Compare commits

...

49 Commits

Author SHA1 Message Date
Ruihang Xia
53a5864944 fix: don't assert path under windows (#2708)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-08 03:30:12 +00:00
yuanbohan
5b70881098 feat: specialize Display for String OtlpAnyValue (#2699) 2023-11-08 03:24:13 +00:00
Niwaka
06d273b75a fix: GET: /config return all configurations when running standalone (#2630)
* fix: config api return all configurations when running standalone

* chore: follow same style

* fix: avoid panic
2023-11-08 03:22:19 +00:00
Ruihang Xia
b382900c5c fix: don't assert path under windows
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-08 11:14:57 +08:00
Lei, HUANG
c79bb5a936 fix: memtable prune (#2698)
* fix: memtable prune

* test: add memtable prune test case with complex filter

* fix: test format
2023-11-07 06:44:21 +00:00
shuiyisong
7e0dcfc797 chore: export tool minor refactor (#2612)
* chore: cr issue

* chore: cr issue

* chore: skip information schema

* chore: fix clippy

* chore: add basic auth support to cli export

* chore: fix cr issue

* chore: reduce `flush` invocation

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: use BufWriter

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-11-06 16:47:27 +00:00
Zhenchi
51ddebdc73 docs: rfc of inverted index for SST file (#2693)
* docs: rfc of inverted index for SST file

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* docs: add row_count_in_group

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* docs: fix spell check

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* docs: update api

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2023-11-06 13:40:28 +00:00
Niwaka
e9f7579091 feat: support region ddl for custom_storage (#2679)
* feat: support region ddl for custom_storage

* fix: typo

* fix: propagate error

* refactor: have manifest_options accept RegionOptions

* chore: improve document
2023-11-06 11:18:47 +00:00
dennis zhuang
f387a09535 fix: refactor and fix to_unixtime (#2695)
* fix: refactor and fix to_unixtime

* fix: sqlness tests

* feat: supports date type

* fix: test

* feat: supports datetime type

* refactor: convert_to_seconds
2023-11-06 11:00:14 +00:00
zyy17
cf94d3295f feat: add '--server-addr' in sqlness runner (#2692)
* feat: add '--server-addr' and '--mode' in sqlness runner

* chore: remove '--mode'

* refactor: add 'connect_db()'
2023-11-06 10:56:04 +00:00
Wei
0a91335e24 feat: decimal128 type in common (#2696)
* feat: decimal type in common

* chore: rename

* feat: decimal128 compare

* chore: clippy

* chore: typos

* chore: cr comment

Co-authored-by: Dennis Zhuang <killme2008@gmail.com>

* chore: cr comment.

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Co-authored-by: Dennis Zhuang <killme2008@gmail.com>
Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2023-11-06 08:04:50 +00:00
Ruihang Xia
6fd04e38a3 feat: implement create region request for metric engine (#2694)
* implement basic put/get/exist interfaces

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test add_column and add_table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* move engine test to test_util

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* verify incoming create region request

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* create data region

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy lints

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* apply review sugg

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-06 03:48:26 +00:00
WU Jingdi
bbaae9223a feat: enable empty range by (#2697) 2023-11-06 03:45:33 +00:00
Weny Xu
060864d0c1 feat: decrease the page size if the response size exceeds the limit (#2689)
* feat: decrease the `page size` if the response message size exceeds the limit

* chore: apply suggestions from CR

* feat: prefer to use adaptive_page_size

* chore: apply suggestions from CR
2023-11-06 03:36:15 +00:00
Yingwen
395632c874 feat(mito): filters memtables by their time ranges (#2686)
* feat: filter memtable by time range

* fix: incorrect time range returned by time series memtable

* test: test memtable pruning
2023-11-03 08:48:21 +00:00
Ruihang Xia
0dca63bc7b feat: Metric engine skeleton (#2687)
* metadata region logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix mito2 test feature gate

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* override unused warnings

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add basic test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* apply review sugg

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy lints

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-03 08:34:08 +00:00
Ruihang Xia
7323d727c9 feat: support current_timestamp and now as default constrains (#2690)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-03 08:32:02 +00:00
yuanbohan
68f92ecf08 feat(otlp): json attributes via impl Serialeze trait (#2685)
* feat: json attributes via impl Serialeze trait

* chore: rename TraceLink to SpanLink

* feat: support serialize null value
2023-11-03 07:27:04 +00:00
Weny Xu
39d52f25bf feat: enable metasrv leader cached kv (#2629)
* feat: enable metasrv leader cached kv

* fix: fix cached kv caching the empty value bug

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* refactor: change DEFAULT_PAGE_SIZE to 1536
2023-11-03 03:56:28 +00:00
Weny Xu
fb8d0c6ce5 refactor: unify the KvBackend (#2684)
* refactor: unify the KvBackend

* refactor: rename kv_store to kv_backend

* chore: apply suggestions from CR

* refactor: rename kv_store to kv_backend

* refactor: rename KvStoreConfig to KvBackendConfig
2023-11-03 02:46:43 +00:00
Weny Xu
ce867fb583 feat: introduce the region lease keeper (#2645)
* feat: introduce the region lease keeper

* chore: apply suggestions from CR

* refactor: simplify `retain_active_regions`

* refactor: remove Default of RegionStat

* chore: add todo comments

* chore: apply suggestions from CR

* refactor: simplify `retain_active_regions`

* fix: fix ci
2023-11-02 12:01:17 +00:00
localhost
04a8fc5138 chore: change admin table api return json format (#2683) 2023-11-02 07:37:57 +00:00
JeremyHi
479ffe5a0f feat: query table meta by ids (#2675)
* feat: add table meta by id

* feat: add help for http api

* chore: by comment

* feat: display for LeaderChangeMessage
2023-11-02 02:45:12 +00:00
yuanbohan
4b48c716b2 fix(OTLP/Trace): use Timestamp instead of Time for json (#2677)
fix: use Timestamp instead of Time for trace time
2023-11-01 09:41:51 +00:00
Ruihang Xia
a9137b77f0 fix: sort condition in HistogramFold (#2674)
* sort ts before le

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-01 08:28:53 +00:00
Yingwen
5f3bbdca4f feat(mito): Reads SST's row groups one by one (#2668)
* feat: read parquet metadata

* feat: add create method for row group

* feat: read parquet by row group

* refactor: use VecDeque to collect batches

* style: fix row group clippy warnings

* chore: update comments

* style: fix clippy

* refactor: simplify row group reader builder

* docs: fix grammar issue

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* chore: format code

---------

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2023-11-01 03:59:16 +00:00
zyy17
7bd137f398 chore: update greptime-proto to main (#2672) 2023-11-01 03:13:36 +00:00
shuiyisong
15a0775a3c chore: use https when tls is present (#2671)
* chore: use https

* chore: use tls
2023-11-01 02:44:02 +00:00
WU Jingdi
180bc64cb0 feat: change metric crate from metrics to prometheus (#2655)
* feat: change metrics to prometheus

* chore: fix code advise

* chore: resolve merge conflict

* chore: fix code advise
2023-10-31 15:46:57 +00:00
Weny Xu
e3320c531d feat: add leader_status for RegionRoute (#2670) 2023-10-31 09:34:56 +00:00
shuiyisong
d77003fb3b fix: add back mut ref (#2669)
* chore: add back mut ref

* chore: allow clippy complaint
2023-10-31 07:00:42 +00:00
Weny Xu
54ed7529ca feat: introduce the region role (#2640)
* feat: introduce region role

* chore: apply suggestions from CR
2023-10-31 06:59:22 +00:00
Yingwen
465c8f714e feat(mito): avoid buffering all batches for the same primary key (#2658)
* feat: Control merge reader by batch size

* test: test heap have large range

* fix: merge one batch

* test: merge many duplicates

* test: test reheap hot

* feat: don't handle empty batch in merge reader
2023-10-31 06:36:31 +00:00
Weny Xu
88eb69530a refactor: remove router rpc (#2646) 2023-10-31 04:54:56 +00:00
Wei
36c0742c45 feat: region disk usage statistic (#2665)
* feat: region usage

* chore: code comment

* chore: unit test

* feat: region usage for engine

* chore: rename to region usage

* feat: add to heartbeat

* chore: u64 to i64 convert

* chore: cr comment.

Co-authored-by: Yingwen <realevenyag@gmail.com>

* Update src/file-engine/src/engine.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-10-31 03:57:37 +00:00
yuanbohan
84bcca9117 fix: events or links to string (#2667) 2023-10-31 02:41:47 +00:00
WU Jingdi
d2f3793d15 fix: explain range query (#2664)
* fix: explain range query

* chore: add comment
2023-10-30 13:21:25 +00:00
Niwaka
000e1471eb feat: make mito2 have ObjectStoreManager(initial) (#2643)
* feat: make mito2 have object_store_manager(initial)

* chore: address review

* refactor: Arc<ObjectStoreManager> to ObjectStoreManagerRef and replace Vec with tuple

* fix: add ObjectStoreManager::from_default

* fix: remove cfg(test)

* fix: remove try_new from ObjectStoreManager
2023-10-30 13:16:04 +00:00
Yingwen
d0ff8ab191 test: remove unstable timezone test (#2666)
test: remove berlin test
2023-10-30 09:17:38 +00:00
Weny Xu
bd177b8cc4 refactor: remove move_value (#2661)
* chore: bump orc-rust to 0.2.42

* refactor: remove move_value
2023-10-27 03:42:50 +00:00
Wei
958ff3f185 feat: estimate region wal size (#2652)
* feat: estimated wal size

* chore: modify factor

* chore: cr comment

* chore: add TODO
2023-10-27 02:46:24 +00:00
Ruihang Xia
5d8b0e8154 chore: bump dependencies (#2659)
* fix all hard error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix nextest

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* trivial changes

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix order by

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix sql keyword and data type

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix range exec's input partitioning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix cover input type

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix explain analyze

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update distributed mode sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix lints

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update locks

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* downgrade dlv-list

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-10-26 12:24:14 +00:00
Ning Sun
84490f56b8 fix: remove StatusCode::Unexpected from masked error code set (#2654)
* fix: remove Unexpected status code from masked error code set

* fix: lint issue

* test: revert test for unexpected error
2023-10-26 07:57:23 +00:00
Abhinesh
cb97768004 fix(grammatical): correct typos and improve grammar (#2656)
* Update CONTRIBUTING.md

* docs: Update CONTRIBUTING.md

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-10-26 07:30:57 +00:00
Ruihang Xia
f08a35d6b9 feat: implement histogram_quantile in PromQL (#2651)
* add to planner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl evaluate_array

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* compute quantile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix required input ordering

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* todo to fixme

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-10-25 08:19:30 +00:00
zyy17
e8adaaf5f7 ci: add the simple framework of nightly functional tests (#2648) 2023-10-25 14:44:36 +08:00
tison
a63fa76b7b docs: Update README.md (#2653)
OpenDAL has been transferred to the ASF.
2023-10-25 06:28:32 +00:00
localhost
102e4c975d chore: show version report data only once at db startup (#2647) 2023-10-25 03:38:33 +00:00
Yingwen
16a3257ada fix: add serde default to MitoConfig (#2650)
* fix: add serde default to MitoConfig

* docs: update config toml
2023-10-24 11:24:47 +00:00
354 changed files with 10639 additions and 6695 deletions

View File

@@ -0,0 +1,31 @@
name: Deploy GreptimeDB cluster
description: Deploy GreptimeDB cluster on Kubernetes
inputs:
aws-ci-test-bucket:
description: 'AWS S3 bucket name for testing'
required: true
aws-region:
description: 'AWS region for testing'
required: true
data-root:
description: 'Data root for testing'
required: true
aws-access-key-id:
description: 'AWS access key id for testing'
required: true
aws-secret-access-key:
description: 'AWS secret access key for testing'
required: true
runs:
using: composite
steps:
- name: Deploy GreptimeDB by Helm
shell: bash
env:
DATA_ROOT: ${{ inputs.data-root }}
AWS_CI_TEST_BUCKET: ${{ inputs.aws-ci-test-bucket }}
AWS_REGION: ${{ inputs.aws-region }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
run: |
./.github/scripts/deploy-greptimedb.sh

59
.github/actions/sqlness-test/action.yml vendored Normal file
View File

@@ -0,0 +1,59 @@
name: Run sqlness test
description: Run sqlness test on GreptimeDB
inputs:
aws-ci-test-bucket:
description: 'AWS S3 bucket name for testing'
required: true
aws-region:
description: 'AWS region for testing'
required: true
data-root:
description: 'Data root for testing'
required: true
aws-access-key-id:
description: 'AWS access key id for testing'
required: true
aws-secret-access-key:
description: 'AWS secret access key for testing'
required: true
runs:
using: composite
steps:
- name: Deploy GreptimeDB cluster by Helm
uses: ./.github/actions/deploy-greptimedb
with:
data-root: ${{ inputs.data-root }}
aws-ci-test-bucket: ${{ inputs.aws-ci-test-bucket }}
aws-region: ${{ inputs.aws-region }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
# TODO(zyy17): The following tests will be replaced by the real sqlness test.
- name: Run tests on greptimedb cluster
shell: bash
run: |
mysql -h 127.0.0.1 -P 14002 -e "CREATE TABLE IF NOT EXISTS system_metrics (host VARCHAR(255), idc VARCHAR(255), cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), TIME INDEX(ts));" && \
mysql -h 127.0.0.1 -P 14002 -e "SHOW TABLES;"
- name: Run tests on greptimedb cluster that uses S3
shell: bash
run: |
mysql -h 127.0.0.1 -P 24002 -e "CREATE TABLE IF NOT EXISTS system_metrics (host VARCHAR(255), idc VARCHAR(255), cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), TIME INDEX(ts));" && \
mysql -h 127.0.0.1 -P 24002 -e "SHOW TABLES;"
- name: Run tests on standalone greptimedb
shell: bash
run: |
mysql -h 127.0.0.1 -P 34002 -e "CREATE TABLE IF NOT EXISTS system_metrics (host VARCHAR(255), idc VARCHAR(255), cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), TIME INDEX(ts));" && \
mysql -h 127.0.0.1 -P 34002 -e "SHOW TABLES;"
- name: Clean S3 data
shell: bash
env:
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
run: |
aws s3 rm s3://${{ inputs.aws-ci-test-bucket }}/${{ inputs.data-root }} --recursive

172
.github/scripts/deploy-greptimedb.sh vendored Executable file
View File

@@ -0,0 +1,172 @@
#!/usr/bin/env bash
set -e
set -o pipefail
KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.24.0}"
ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
# Ceate a cluster with 1 control-plane node and 5 workers.
function create_kind_cluster() {
cat <<EOF | kind create cluster --name "${CLUSTER}" --image kindest/node:"$KUBERNETES_VERSION" --config=-
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
EOF
}
# Add greptime Helm chart repo.
function add_greptime_chart() {
helm repo add greptime "$GREPTIME_CHART"
helm repo update
}
# Deploy a etcd cluster with 3 members.
function deploy_etcd_cluster() {
local namespace="$1"
helm install etcd "$ETCD_CHART" \
--set replicaCount=3 \
--set auth.rbac.create=false \
--set auth.rbac.token.enabled=false \
-n "$namespace"
# Wait for etcd cluster to be ready.
kubectl rollout status statefulset/etcd -n "$namespace"
}
# Deploy greptimedb-operator.
function deploy_greptimedb_operator() {
# Use the latest chart and image.
helm install greptimedb-operator greptime/greptimedb-operator \
--set image.tag=latest \
-n "$DEFAULT_INSTALL_NAMESPACE"
# Wait for greptimedb-operator to be ready.
kubectl rollout status deployment/greptimedb-operator -n "$DEFAULT_INSTALL_NAMESPACE"
}
# Deploy greptimedb cluster by using local storage.
# It will expose cluster service ports as '14000', '14001', '14002', '14003' to local access.
function deploy_greptimedb_cluster() {
local cluster_name=$1
local install_namespace=$2
kubectl create ns "$install_namespace"
deploy_etcd_cluster "$install_namespace"
helm install "$cluster_name" greptime/greptimedb-cluster \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set meta.etcdEndpoints="etcd.$install_namespace:2379" \
-n "$install_namespace"
# Wait for greptimedb cluster to be ready.
while true; do
PHASE=$(kubectl -n "$install_namespace" get gtc "$cluster_name" -o jsonpath='{.status.clusterPhase}')
if [ "$PHASE" == "Running" ]; then
echo "Cluster is ready"
break
else
echo "Cluster is not ready yet: Current phase: $PHASE"
sleep 5 # wait for 5 seconds before check again.
fi
done
# Expose greptimedb cluster to local access.
kubectl -n "$install_namespace" port-forward svc/"$cluster_name"-frontend \
14000:4000 \
14001:4001 \
14002:4002 \
14003:4003 > /tmp/connections.out &
}
# Deploy greptimedb cluster by using S3.
# It will expose cluster service ports as '24000', '24001', '24002', '24003' to local access.
function deploy_greptimedb_cluster_with_s3_storage() {
local cluster_name=$1
local install_namespace=$2
kubectl create ns "$install_namespace"
deploy_etcd_cluster "$install_namespace"
helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set meta.etcdEndpoints="etcd.$install_namespace:2379" \
--set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
--set storage.s3.region="$AWS_REGION" \
--set storage.s3.root="$DATA_ROOT" \
--set storage.s3.secretName=s3-credentials \
--set storage.credentials.secretName=s3-credentials \
--set storage.credentials.secretCreation.enabled=true \
--set storage.credentials.secretCreation.enableEncryption=false \
--set storage.credentials.secretCreation.data.access-key-id="$AWS_ACCESS_KEY_ID" \
--set storage.credentials.secretCreation.data.secret-access-key="$AWS_SECRET_ACCESS_KEY"
# Wait for greptimedb cluster to be ready.
while true; do
PHASE=$(kubectl -n "$install_namespace" get gtc "$cluster_name" -o jsonpath='{.status.clusterPhase}')
if [ "$PHASE" == "Running" ]; then
echo "Cluster is ready"
break
else
echo "Cluster is not ready yet: Current phase: $PHASE"
sleep 5 # wait for 5 seconds before check again.
fi
done
# Expose greptimedb cluster to local access.
kubectl -n "$install_namespace" port-forward svc/"$cluster_name"-frontend \
24000:4000 \
24001:4001 \
24002:4002 \
24003:4003 > /tmp/connections.out &
}
# Deploy standalone greptimedb.
# It will expose cluster service ports as '34000', '34001', '34002', '34003' to local access.
function deploy_standalone_greptimedb() {
helm install greptimedb-standalone greptime/greptimedb-standalone \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
-n "$DEFAULT_INSTALL_NAMESPACE"
# Wait for etcd cluster to be ready.
kubectl rollout status statefulset/greptimedb-standalone -n "$DEFAULT_INSTALL_NAMESPACE"
# Expose greptimedb to local access.
kubectl -n "$DEFAULT_INSTALL_NAMESPACE" port-forward svc/greptimedb-standalone \
34000:4000 \
34001:4001 \
34002:4002 \
34003:4003 > /tmp/connections.out &
}
# Entrypoint of the script.
function main() {
create_kind_cluster
add_greptime_chart
# Deploy standalone greptimedb in the same K8s.
if [ "$ENABLE_STANDALONE_MODE" == "true" ]; then
deploy_standalone_greptimedb
fi
deploy_greptimedb_operator
deploy_greptimedb_cluster testcluster testcluster
deploy_greptimedb_cluster_with_s3_storage testcluster-s3 testcluster-s3
}
# Usages:
# - Deploy greptimedb cluster: ./deploy-greptimedb.sh
main

View File

@@ -0,0 +1,26 @@
name: Nightly functional tests
on:
schedule:
# At 00:00 on Tuesday.
- cron: '0 0 * * 2'
workflow_dispatch:
jobs:
sqlness-test:
name: Run sqlness test
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Run sqlness test
uses: ./.github/actions/sqlness-test
with:
data-root: sqlness-test
aws-ci-test-bucket: ${{ vars.AWS_CI_TEST_BUCKET }}
aws-region: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
aws-access-key-id: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}

View File

@@ -2,7 +2,7 @@
Thanks a lot for considering contributing to GreptimeDB. We believe people like you would make GreptimeDB a great product. We intend to build a community where individuals can have open talks, show respect for one another, and speak with true ❤️. Meanwhile, we are to keep transparency and make your effort count here.
Read the guidelines, and they can help you get started. Communicate with respect to developers maintaining and developing the project. In return, they should reciprocate that respect by addressing your issue, reviewing changes, as well as helping finalize and merge your pull requests.
Please read the guidelines, and they can help you get started. Communicate with respect to developers maintaining and developing the project. In return, they should reciprocate that respect by addressing your issue, reviewing changes, as well as helping finalize and merge your pull requests.
Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get the whole picture of the project. To learn about the design of GreptimeDB, please refer to the [design docs](https://github.com/GrepTimeTeam/docs).
@@ -21,7 +21,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such
- Write tutorials or blog posts. Blog, speak about, or create tutorials about one of GreptimeDB's many features. Mention [@greptime](https://twitter.com/greptime) on Twitter and email info@greptime.com so we can give pointers and tips and help you spread the word by promoting your content on Greptime communication channels.
- Improve the documentation. [Submit documentation](http://github.com/greptimeTeam/docs/) updates, enhancements, designs, or bug fixes, and fixing any spelling or grammar errors will be very much appreciated.
- Present at meetups and conferences about your GreptimeDB projects. Your unique challenges and successes in building things with GreptimeDB can provide great speaking material. We'd love to review your talk abstract, so get in touch with us if you'd like some help!
- Submit bug reports. To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
- Submitting bug reports. To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
- Speak up feature requests. Send feedback is a great way for us to understand your different use cases of GreptimeDB better. If you want to share your experience with GreptimeDB, or if you want to discuss any ideas, you can start a discussion on [GitHub discussions](https://github.com/GreptimeTeam/greptimedb/discussions), chat with the Greptime team on [Slack](https://greptime.com/slack), or you can tweet [@greptime](https://twitter.com/greptime) on Twitter.
## Code of Conduct
@@ -81,7 +81,7 @@ Now, `pre-commit` will run automatically on `git commit`.
### Title
The titles of pull requests should be prefixed with category names listed in [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0)
like `feat`/`fix`/`docs`, with a concise summary of code change following. DO NOT use last commit message as pull request title.
like `feat`/`fix`/`docs`, with a concise summary of code change following. AVOID using the last commit message as pull request title.
### Description
@@ -100,7 +100,7 @@ of what you were trying to do and what went wrong. You can also reach for help i
## Community
The core team will be thrilled if you participate in any way you like. When you are stuck, try ask for help by filing an issue, with a detailed description of what you were trying to do and what went wrong. If you have any questions or if you would like to get involved in our community, please check out:
The core team will be thrilled if you would like to participate in any way you like. When you are stuck, try to ask for help by filing an issue, with a detailed description of what you were trying to do and what went wrong. If you have any questions or if you would like to get involved in our community, please check out:
- [GreptimeDB Community Slack](https://greptime.com/slack)
- [GreptimeDB Github Discussions](https://github.com/GreptimeTeam/greptimedb/discussions)

2716
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@ members = [
"src/common/telemetry",
"src/common/test-util",
"src/common/time",
"src/common/decimal",
"src/common/version",
"src/datanode",
"src/datatypes",
@@ -35,6 +36,7 @@ members = [
"src/log-store",
"src/meta-client",
"src/meta-srv",
"src/metric-engine",
"src/mito2",
"src/object-store",
"src/operator",
@@ -61,36 +63,41 @@ license = "Apache-2.0"
[workspace.dependencies]
aquamarine = "0.3"
arrow = { version = "43.0" }
arrow-array = "43.0"
arrow-flight = "43.0"
arrow-schema = { version = "43.0", features = ["serde"] }
arrow = { version = "47.0" }
arrow-array = "47.0"
arrow-flight = "47.0"
arrow-schema = { version = "47.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
bigdecimal = "0.4.2"
chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
derive_builder = "0.12"
etcd-client = "0.11"
etcd-client = "0.12"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1f1dd532a111e3834cc3019c5605e2993ffb9dc3" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "5da72f1cae6b24315e5afc87520aaf7b4d6bb872" }
humantime-serde = "1.1"
itertools = "0.10"
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
metrics = "0.20"
moka = "0.12"
once_cell = "1.18"
opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics", "traces"] }
parquet = "43.0"
opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
"gen-tonic",
"metrics",
"trace",
] }
parquet = "47.0"
paste = "1.0"
prost = "0.11"
prometheus = { version = "0.13.3", features = ["process"] }
prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8"
regex = "1.8"
@@ -99,11 +106,13 @@ reqwest = { version = "0.11", default-features = false, features = [
"rustls-tls-native-roots",
"stream",
] }
rust_decimal = "1.32.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
smallvec = "1"
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6cf9d23d5b8fbecd65efc1d9afb7e80ad7a424da", features = [
# on branch v0.38.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0fbae07d0c46dc18e3381c406d8b9b8abef6b1fd", features = [
"visitor",
] }
strum = { version = "0.25", features = ["derive"] }
@@ -111,7 +120,7 @@ tempfile = "3"
tokio = { version = "1.28", features = ["full"] }
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.7"
tonic = { version = "0.9", features = ["tls"] }
tonic = { version = "0.10", features = ["tls"] }
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
## workspaces members
api = { path = "src/api" }

View File

@@ -184,6 +184,6 @@ Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
## Acknowledgement
- GreptimeDB uses [Apache Arrow](https://arrow.apache.org/) as the memory model and [Apache Parquet](https://parquet.apache.org/) as the persistent file format.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
- [OpenDAL](https://github.com/datafuselabs/opendal) from [Datafuse Labs](https://github.com/datafuselabs) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDBs meta service is based on [etcd](https://etcd.io/).
- [Apache OpenDAL (incubating)](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDB's meta service is based on [etcd](https://etcd.io/).
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.

View File

@@ -101,6 +101,10 @@ auto_flush_interval = "1h"
global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G).
global_write_buffer_reject_size = "2GB"
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache.
sst_meta_cache_size = "128MB"
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache.
vector_cache_size = "512MB"
# Log options
# [logging]

View File

@@ -0,0 +1,113 @@
---
Feature Name: Inverted Index for SST File
Tracking Issue: TBD
Date: 2023-11-03
Author: "Zhong Zhenchi <zhongzc_arch@outlook.com>"
---
# Summary
This RFC proposes an optimization towards the storage engine by introducing an inverted indexing methodology aimed at optimizing label selection queries specifically pertaining to Metrics with tag columns as the target for optimization.
# Introduction
In the current system setup, in the Mito Engine, the first column of Primary Keys has a Min-Max index, which significantly optimizes the outcome. However, there are limitations when it comes to other columns, primarily tags. This RFC suggests the implementation of an inverted index to provide enhanced filtering benefits to bridge these limitations and improve overall system performance.
# Design Detail
## Inverted Index
The primary aim of the proposed inverted index is to optimize tag columns in the SST Parquet Files within the Mito Engine. The mapping and construction of an inverted index, from Tag Values to Row Groups, enables efficient logical structures that provide faster and more flexible queries.
When scanning SST Files, pushed-down filters applied to a respective Tag's inverted index, determine the final Row Groups to be indexed and scanned, further bolstering the speed and efficiency of data retrieval processes.
## Index Format
The Inverted Index for each SST file follows the format shown below:
```
inverted_index₀ inverted_index₁ ... inverted_indexₙ footer
```
The structure inside each Inverted Index is as followed:
```
bitmap₀ bitmap₁ bitmap₂ ... bitmapₙ null_bitmap fst
```
The format is encapsulated by a footer:
```
footer_payload footer_payload_size
```
The `footer_payload` is presented in protobuf encoding of `InvertedIndexFooter`.
The complete format is containerized in [Puffin](https://iceberg.apache.org/puffin-spec/) with the type defined as `greptime-inverted-index-v1`.
## Protobuf Details
The `InvertedIndexFooter` is defined in the following protobuf structure:
```protobuf
message InvertedIndexFooter {
repeated InvertedIndexMeta metas;
}
message InvertedIndexMeta {
string name;
uint64 row_count_in_group;
uint64 fst_offset;
uint64 fst_size;
uint64 null_bitmap_offset;
uint64 null_bitmap_size;
InvertedIndexStats stats;
}
message InvertedIndexStats {
uint64 null_count;
uint64 distinct_count;
bytes min_value;
bytes max_value;
}
```
## Bitmap
Bitmaps are used to represent indices of fixed-size groups. Rows are divided into groups of a fixed size, defined in the `InvertedIndexMeta` as `row_count_in_group`.
For example, when `row_count_in_group` is `4096`, it means each group has `4096` rows. If there are a total of `10000` rows, there will be `3` groups in total. The first two groups will have `4096` rows each, and the last group will have `1808` rows. If the indexed values are found in row `200` and `9000`, they will correspond to groups `0` and `2`, respectively. Therefore, the bitmap should show `0` and `2`.
Bitmap is implemented using [BitVec](https://docs.rs/bitvec/latest/bitvec/), selected due to its efficient representation of dense data arrays typical of indices of groups.
## Finite State Transducer (FST)
[FST](https://docs.rs/fst/latest/fst/) is a highly efficient data structure ideal for in-memory indexing. It represents ordered sets or maps where the keys are bytes. The choice of the FST effectively balances the need for performance, space efficiency, and the ability to perform complex analyses such as regular expression matching.
The conventional usage of FST and `u64` values has been adapted to facilitate indirect indexing to row groups. As the row groups are represented as Bitmaps, we utilize the `u64` values split into bitmap's offset (higher 32 bits) and size (lower 32 bits) to represent the location of these Bitmaps.
## API Design
Two APIs `InvertedIndexBuilder` for building indexes and `InvertedIndexSearcher` for querying indexes are designed:
```rust
type Bytes = Vec<u8>;
type GroupId = u64;
trait InvertedIndexBuilder {
fn add(&mut self, name: &str, value: Option<&Bytes>, group_id: GroupId) -> Result<()>;
fn finish(&mut self) -> Result<()>;
}
enum Predicate {
Gt(Bytes),
GtEq(Bytes),
Lt(Bytes),
LtEq(Bytes),
InList(Vec<Bytes>),
RegexMatch(String),
}
trait InvertedIndexSearcher {
fn search(&mut self, name: &str, predicates: &[Predicate]) -> Result<impl IntoIterator<GroupId>>;
}
```

View File

@@ -28,7 +28,12 @@ pub type Result<T> = std::result::Result<T, Error>;
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Unknown proto column datatype: {}", datatype))]
UnknownColumnDataType { datatype: i32, location: Location },
UnknownColumnDataType {
datatype: i32,
location: Location,
#[snafu(source)]
error: prost::DecodeError,
},
#[snafu(display("Failed to create column datatype from {:?}", from))]
IntoColumnDataType {

View File

@@ -50,7 +50,7 @@ pub struct ColumnDataTypeWrapper(ColumnDataType);
impl ColumnDataTypeWrapper {
pub fn try_new(datatype: i32) -> Result<Self> {
let datatype = ColumnDataType::from_i32(datatype)
let datatype = ColumnDataType::try_from(datatype)
.context(error::UnknownColumnDataTypeSnafu { datatype })?;
Ok(Self(datatype))
}
@@ -705,7 +705,7 @@ pub fn is_semantic_type_eq(type_value: i32, semantic_type: SemanticType) -> bool
/// Returns true if the pb type value is valid.
pub fn is_column_type_value_eq(type_value: i32, expect_type: &ConcreteDataType) -> bool {
let Some(column_type) = ColumnDataType::from_i32(type_value) else {
let Ok(column_type) = ColumnDataType::try_from(type_value) else {
return false;
};

View File

@@ -30,10 +30,10 @@ futures = "0.3"
futures-util.workspace = true
lazy_static.workspace = true
meta-client = { workspace = true }
metrics.workspace = true
moka = { workspace = true, features = ["future"] }
parking_lot = "0.12"
partition.workspace = true
prometheus.workspace = true
regex.workspace = true
serde.workspace = true
serde_json = "1.0"

View File

@@ -25,11 +25,10 @@ use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService};
use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use common_meta::rpc::KeyValue;
use common_telemetry::{debug, timer};
use common_telemetry::debug;
use meta_client::client::MetaClient;
use moka::future::{Cache, CacheBuilder};
use snafu::{OptionExt, ResultExt};
@@ -152,25 +151,11 @@ impl KvBackend for CachedMetaKvBackend {
}
}
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
let from_key = &req.from_key.clone();
let to_key = &req.to_key.clone();
let ret = self.kv_backend.move_value(req).await;
if ret.is_ok() {
self.invalidate_key(from_key).await;
self.invalidate_key(to_key).await;
}
ret
}
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
let _timer = timer!(METRIC_CATALOG_KV_GET);
let _timer = METRIC_CATALOG_KV_GET.start_timer();
let init = async {
let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer();
self.kv_backend.get(key).await.map(|val| {
val.with_context(|| CacheNotGetSnafu {
key: String::from_utf8_lossy(key),
@@ -319,14 +304,6 @@ impl KvBackend for MetaKvBackend {
.context(ExternalSnafu)
}
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
self.client
.move_value(req)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)
}
fn as_any(&self) -> &dyn Any {
self
}

View File

@@ -17,8 +17,8 @@ use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::{Arc, RwLock, Weak};
use common_catalog::build_db_string;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME};
use metrics::{decrement_gauge, increment_gauge};
use snafu::OptionExt;
use table::TableRef;
@@ -166,7 +166,7 @@ impl MemoryCatalogManager {
let arc_self = Arc::new(self.clone());
let catalog = arc_self.create_catalog_entry(name);
e.insert(catalog);
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0);
crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT.inc();
Ok(true)
}
Entry::Occupied(_) => Ok(false),
@@ -187,11 +187,9 @@ impl MemoryCatalogManager {
})?;
let result = schema.remove(&request.table_name);
if result.is_some() {
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT
.with_label_values(&[build_db_string(&request.catalog, &request.schema).as_str()])
.dec();
}
Ok(())
}
@@ -210,7 +208,7 @@ impl MemoryCatalogManager {
match catalog.entry(request.schema) {
Entry::Vacant(e) => {
e.insert(HashMap::new());
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0);
crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT.inc();
Ok(true)
}
Entry::Occupied(_) => Ok(false),
@@ -238,11 +236,9 @@ impl MemoryCatalogManager {
.fail();
}
schema.insert(request.table_name, request.table);
increment_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT
.with_label_values(&[build_db_string(&request.catalog, &request.schema).as_str()])
.inc();
Ok(true)
}

View File

@@ -12,18 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_catalog::build_db_string;
pub(crate) const METRIC_DB_LABEL: &str = "db";
pub(crate) const METRIC_CATALOG_MANAGER_CATALOG_COUNT: &str = "catalog.catalog_count";
pub(crate) const METRIC_CATALOG_MANAGER_SCHEMA_COUNT: &str = "catalog.schema_count";
pub(crate) const METRIC_CATALOG_MANAGER_TABLE_COUNT: &str = "catalog.table_count";
use lazy_static::lazy_static;
use prometheus::*;
pub(crate) const METRIC_CATALOG_KV_REMOTE_GET: &str = "catalog.kv.get.remote";
pub(crate) const METRIC_CATALOG_KV_GET: &str = "catalog.kv.get";
#[inline]
pub(crate) fn db_label(catalog: &str, schema: &str) -> (&'static str, String) {
(METRIC_DB_LABEL, build_db_string(catalog, schema))
lazy_static! {
pub static ref METRIC_CATALOG_MANAGER_CATALOG_COUNT: IntGauge =
register_int_gauge!("catalog_catalog_count", "catalog catalog count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_SCHEMA_COUNT: IntGauge =
register_int_gauge!("catalog_schema_count", "catalog schema count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!(
"catalog_table_count",
"catalog table count",
&[METRIC_DB_LABEL]
)
.unwrap();
pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
register_histogram!("catalog_kv_get_remote", "catalog kv get remote").unwrap();
pub static ref METRIC_CATALOG_KV_GET: Histogram =
register_histogram!("catalog_kv_get", "catalog kv get").unwrap();
}

View File

@@ -27,8 +27,10 @@ datatypes = { workspace = true }
derive_builder.workspace = true
enum_dispatch = "0.3"
futures-util.workspace = true
lazy_static.workspace = true
moka = { workspace = true, features = ["future"] }
parking_lot = "0.12"
prometheus.workspace = true
prost.workspace = true
rand.workspace = true
session = { workspace = true }
@@ -41,11 +43,10 @@ tonic.workspace = true
common-grpc-expr = { workspace = true }
datanode = { workspace = true }
derive-new = "0.5"
prost.workspace = true
substrait = { workspace = true }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
[dev-dependencies.substrait_proto]
package = "substrait"
version = "0.7"
version = "0.17"

View File

@@ -28,7 +28,7 @@ use common_grpc::flight::{FlightDecoder, FlightMessage};
use common_query::Output;
use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::RecordBatchStreamAdaptor;
use common_telemetry::{logging, timer};
use common_telemetry::logging;
use futures_util::StreamExt;
use prost::Message;
use snafu::{ensure, ResultExt};
@@ -111,12 +111,12 @@ impl Database {
}
pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
let _timer = timer!(metrics::METRIC_GRPC_INSERT);
let _timer = metrics::METRIC_GRPC_INSERT.start_timer();
self.handle(Request::Inserts(requests)).await
}
pub async fn row_insert(&self, requests: RowInsertRequests) -> Result<u32> {
let _timer = timer!(metrics::METRIC_GRPC_INSERT);
let _timer = metrics::METRIC_GRPC_INSERT.start_timer();
self.handle(Request::RowInserts(requests)).await
}
@@ -141,7 +141,7 @@ impl Database {
}
pub async fn delete(&self, request: DeleteRequests) -> Result<u32> {
let _timer = timer!(metrics::METRIC_GRPC_DELETE);
let _timer = metrics::METRIC_GRPC_DELETE.start_timer();
self.handle(Request::Deletes(request)).await
}
@@ -171,7 +171,7 @@ impl Database {
where
S: AsRef<str>,
{
let _timer = timer!(metrics::METRIC_GRPC_SQL);
let _timer = metrics::METRIC_GRPC_SQL.start_timer();
self.do_get(
Request::Query(QueryRequest {
query: Some(Query::Sql(sql.as_ref().to_string())),
@@ -182,7 +182,7 @@ impl Database {
}
pub async fn logical_plan(&self, logical_plan: Vec<u8>, trace_id: u64) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_LOGICAL_PLAN);
let _timer = metrics::METRIC_GRPC_LOGICAL_PLAN.start_timer();
self.do_get(
Request::Query(QueryRequest {
query: Some(Query::LogicalPlan(logical_plan)),
@@ -199,7 +199,7 @@ impl Database {
end: &str,
step: &str,
) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_PROMQL_RANGE_QUERY);
let _timer = metrics::METRIC_GRPC_PROMQL_RANGE_QUERY.start_timer();
self.do_get(
Request::Query(QueryRequest {
query: Some(Query::PromRangeQuery(PromRangeQuery {
@@ -215,7 +215,7 @@ impl Database {
}
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_CREATE_TABLE);
let _timer = metrics::METRIC_GRPC_CREATE_TABLE.start_timer();
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(expr)),
@@ -226,7 +226,7 @@ impl Database {
}
pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_ALTER);
let _timer = metrics::METRIC_GRPC_ALTER.start_timer();
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)),
@@ -237,7 +237,7 @@ impl Database {
}
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_DROP_TABLE);
let _timer = metrics::METRIC_GRPC_DROP_TABLE.start_timer();
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::DropTable(expr)),
@@ -248,7 +248,7 @@ impl Database {
}
pub async fn truncate_table(&self, expr: TruncateTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_TRUNCATE_TABLE);
let _timer = metrics::METRIC_GRPC_TRUNCATE_TABLE.start_timer();
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::TruncateTable(expr)),
@@ -260,7 +260,7 @@ impl Database {
async fn do_get(&self, request: Request, trace_id: u64) -> Result<Output> {
// FIXME(paomian): should be added some labels for metrics
let _timer = timer!(metrics::METRIC_GRPC_DO_GET);
let _timer = metrics::METRIC_GRPC_DO_GET.start_timer();
let request = self.to_rpc_request(request, trace_id);
let request = Ticket {
ticket: request.encode_to_vec().into(),

View File

@@ -12,15 +12,34 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! client metrics
pub const METRIC_GRPC_CREATE_TABLE: &str = "grpc.create_table";
pub const METRIC_GRPC_PROMQL_RANGE_QUERY: &str = "grpc.promql.range_query";
pub const METRIC_GRPC_INSERT: &str = "grpc.insert";
pub const METRIC_GRPC_DELETE: &str = "grpc.delete";
pub const METRIC_GRPC_SQL: &str = "grpc.sql";
pub const METRIC_GRPC_LOGICAL_PLAN: &str = "grpc.logical_plan";
pub const METRIC_GRPC_ALTER: &str = "grpc.alter";
pub const METRIC_GRPC_DROP_TABLE: &str = "grpc.drop_table";
pub const METRIC_GRPC_TRUNCATE_TABLE: &str = "grpc.truncate_table";
pub const METRIC_GRPC_DO_GET: &str = "grpc.do_get";
pub(crate) const METRIC_REGION_REQUEST_GRPC: &str = "grpc.region_request";
use lazy_static::lazy_static;
use prometheus::*;
lazy_static! {
pub static ref METRIC_GRPC_CREATE_TABLE: Histogram =
register_histogram!("grpc_create_table", "grpc create table").unwrap();
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram =
register_histogram!("grpc_promql_range_query", "grpc promql range query").unwrap();
pub static ref METRIC_GRPC_INSERT: Histogram =
register_histogram!("grpc_insert", "grpc insert").unwrap();
pub static ref METRIC_GRPC_DELETE: Histogram =
register_histogram!("grpc_delete", "grpc delete").unwrap();
pub static ref METRIC_GRPC_SQL: Histogram =
register_histogram!("grpc_sql", "grpc sql").unwrap();
pub static ref METRIC_GRPC_LOGICAL_PLAN: Histogram =
register_histogram!("grpc_logical_plan", "grpc logical plan").unwrap();
pub static ref METRIC_GRPC_ALTER: Histogram =
register_histogram!("grpc_alter", "grpc alter").unwrap();
pub static ref METRIC_GRPC_DROP_TABLE: Histogram =
register_histogram!("grpc_drop_table", "grpc drop table").unwrap();
pub static ref METRIC_GRPC_TRUNCATE_TABLE: Histogram =
register_histogram!("grpc_truncate_table", "grpc truncate table").unwrap();
pub static ref METRIC_GRPC_DO_GET: Histogram =
register_histogram!("grpc_do_get", "grpc do get").unwrap();
pub static ref METRIC_REGION_REQUEST_GRPC: HistogramVec = register_histogram_vec!(
"grpc_region_request",
"grpc region request",
&["request_type"]
)
.unwrap();
}

View File

@@ -24,7 +24,7 @@ use common_meta::datanode_manager::{AffectedRows, Datanode};
use common_meta::error::{self as meta_error, Result as MetaResult};
use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
use common_telemetry::{error, timer};
use common_telemetry::error;
use prost::Message;
use snafu::{location, Location, OptionExt, ResultExt};
use tokio_stream::StreamExt;
@@ -152,11 +152,9 @@ impl RegionRequester {
.with_context(|| MissingFieldSnafu { field: "body" })?
.as_ref()
.to_string();
let _timer = timer!(
metrics::METRIC_REGION_REQUEST_GRPC,
&[("request_type", request_type)]
);
let _timer = metrics::METRIC_REGION_REQUEST_GRPC
.with_label_values(&[request_type.as_str()])
.start_timer();
let mut client = self.client.raw_region_client()?;

View File

@@ -10,9 +10,7 @@ name = "greptime"
path = "src/bin/greptime.rs"
[features]
default = ["metrics-process"]
tokio-console = ["common-telemetry/tokio-console"]
metrics-process = ["servers/metrics-process"]
[dependencies]
anymap = "1.0.0-beta.2"
@@ -45,11 +43,11 @@ futures.workspace = true
lazy_static.workspace = true
meta-client = { workspace = true }
meta-srv = { workspace = true }
metrics.workspace = true
mito2 = { workspace = true }
nu-ansi-term = "0.46"
partition = { workspace = true }
plugins.workspace = true
prometheus.workspace = true
prost.workspace = true
query = { workspace = true }
rand.workspace = true
@@ -63,6 +61,7 @@ snafu.workspace = true
substrait = { workspace = true }
table = { workspace = true }
tokio.workspace = true
toml.workspace = true
[target.'cfg(not(windows))'.dependencies]
tikv-jemallocator = "0.5"
@@ -71,7 +70,6 @@ tikv-jemallocator = "0.5"
common-test-util = { workspace = true }
serde.workspace = true
temp-env = "0.3"
toml.workspace = true
[target.'cfg(not(windows))'.dev-dependencies]
rexpect = "0.5"

View File

@@ -21,7 +21,11 @@ use cmd::error::Result;
use cmd::options::{Options, TopLevelOptions};
use cmd::{cli, datanode, frontend, metasrv, standalone};
use common_telemetry::logging::{error, info, TracingOptions};
use metrics::gauge;
lazy_static::lazy_static! {
static ref APP_VERSION: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("app_version", "app version", &["short_version", "version"]).unwrap();
}
#[derive(Parser)]
#[clap(name = "greptimedb", version = print_version())]
@@ -204,11 +208,12 @@ async fn main() -> Result<()> {
};
common_telemetry::set_panic_hook();
common_telemetry::init_default_metrics_recorder();
let _guard = common_telemetry::init_global_logging(app_name, logging_opts, tracing_opts);
// Report app version as gauge.
gauge!("app_version", 1.0, "short_version" => short_version(), "version" => full_version());
APP_VERSION
.with_label_values(&[short_version(), full_version()])
.inc();
// Log version and argument flags.
info!(

View File

@@ -20,14 +20,13 @@ use std::time::Duration;
use async_trait::async_trait;
use clap::Parser;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute};
use common_meta::table_name::TableName;
use common_telemetry::info;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema};
use meta_srv::service::store::etcd::EtcdStore;
use meta_srv::service::store::kv::KvBackendAdapter;
use rand::Rng;
use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType};
@@ -64,9 +63,7 @@ impl BenchTableMetadataCommand {
pub async fn build(&self) -> Result<Instance> {
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap();
let table_metadata_manager = Arc::new(TableMetadataManager::new(KvBackendAdapter::wrap(
etcd_store,
)));
let table_metadata_manager = Arc::new(TableMetadataManager::new(etcd_store));
let tool = BenchTableMetadata {
table_metadata_manager,
@@ -157,6 +154,7 @@ fn create_region_routes() -> Vec<RegionRoute> {
addr: String::new(),
}),
follower_peers: vec![],
leader_status: None,
});
}

View File

@@ -17,6 +17,8 @@ use std::sync::Arc;
use async_trait::async_trait;
use clap::{Parser, ValueEnum};
use client::api::v1::auth_header::AuthScheme;
use client::api::v1::Basic;
use client::{Client, Database, DEFAULT_SCHEMA_NAME};
use common_query::Output;
use common_recordbatch::util::collect;
@@ -25,13 +27,14 @@ use datatypes::scalars::ScalarVector;
use datatypes::vectors::{StringVector, Vector};
use snafu::{OptionExt, ResultExt};
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::Semaphore;
use crate::cli::{Instance, Tool};
use crate::error::{
CollectRecordBatchesSnafu, ConnectServerSnafu, EmptyResultSnafu, Error, FileIoSnafu,
InvalidDatabaseNameSnafu, NotDataFromOutputSnafu, RequestDatabaseSnafu, Result,
IllegalConfigSnafu, InvalidDatabaseNameSnafu, NotDataFromOutputSnafu, RequestDatabaseSnafu,
Result,
};
type TableReference = (String, String, String);
@@ -70,6 +73,10 @@ pub struct ExportCommand {
/// Things to export
#[clap(long, short = 't', value_enum)]
target: ExportTarget,
/// basic authentication for connecting to the server
#[clap(long)]
auth_basic: Option<String>,
}
impl ExportCommand {
@@ -82,12 +89,22 @@ impl ExportCommand {
addr: self.addr.clone(),
})?;
let (catalog, schema) = split_database(&self.database)?;
let database_client = Database::new(
let mut database_client = Database::new(
catalog.clone(),
schema.clone().unwrap_or(DEFAULT_SCHEMA_NAME.to_string()),
client,
);
if let Some(auth_basic) = &self.auth_basic {
let (username, password) = auth_basic.split_once(':').context(IllegalConfigSnafu {
msg: "auth_basic cannot be split by ':'".to_string(),
})?;
database_client.set_auth(AuthScheme::Basic(Basic {
username: username.to_string(),
password: password.to_string(),
}));
}
Ok(Instance::Tool(Box::new(Export {
client: database_client,
catalog,
@@ -141,6 +158,9 @@ impl Export {
let mut result = Vec::with_capacity(schemas.len());
for i in 0..schemas.len() {
let schema = schemas.get_data(i).unwrap().to_owned();
if schema == common_catalog::consts::INFORMATION_SCHEMA_NAME {
continue;
}
result.push((self.catalog.clone(), schema));
}
Ok(result)
@@ -326,25 +346,30 @@ impl Export {
let copy_from_file =
Path::new(&self.output_dir).join(format!("{catalog}-{schema}_copy_from.sql"));
let mut file = File::create(copy_from_file).await.context(FileIoSnafu)?;
let mut writer =
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
let copy_from_sql = dir_filenames
.into_iter()
.map(|file| {
let file = file.unwrap();
let filename = file.file_name().into_string().unwrap();
for table_file in dir_filenames {
let table_file = table_file.unwrap();
let table_name = table_file
.file_name()
.into_string()
.unwrap()
.replace(".parquet", "");
format!(
"copy {} from '{}' with (format='parquet');\n",
filename.replace(".parquet", ""),
file.path().to_str().unwrap()
writer
.write(
format!(
"copy {} from '{}' with (format='parquet');\n",
table_name,
table_file.path().to_str().unwrap()
)
.as_bytes(),
)
})
.collect::<Vec<_>>()
.join("");
file.write_all(copy_from_sql.as_bytes())
.await
.context(FileIoSnafu)?;
.await
.context(FileIoSnafu)?;
}
writer.flush().await.context(FileIoSnafu)?;
info!("finished exporting {catalog}.{schema} copy_from.sql");

View File

@@ -27,6 +27,8 @@ use common_meta::key::table_name::{TableNameKey, TableNameValue};
use common_meta::key::table_region::{TableRegionKey, TableRegionValue};
use common_meta::key::table_route::{TableRouteKey, TableRouteValue as NextTableRouteValue};
use common_meta::key::{RegionDistribution, TableMetaKey};
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::kv_backend::KvBackendRef;
use common_meta::range_stream::PaginationStream;
use common_meta::rpc::router::TableRoute;
use common_meta::rpc::store::{BatchDeleteRequest, BatchPutRequest, PutRequest, RangeRequest};
@@ -35,8 +37,6 @@ use common_meta::util::get_prefix_end_key;
use common_telemetry::info;
use etcd_client::Client;
use futures::TryStreamExt;
use meta_srv::service::store::etcd::EtcdStore;
use meta_srv::service::store::kv::{KvBackendAdapter, KvStoreRef};
use prost::Message;
use snafu::ResultExt;
use v1_helper::{CatalogKey as v1CatalogKey, SchemaKey as v1SchemaKey, TableGlobalValue};
@@ -81,7 +81,7 @@ impl UpgradeCommand {
}
struct MigrateTableMetadata {
etcd_store: KvStoreRef,
etcd_store: KvBackendRef,
dryrun: bool,
skip_table_global_keys: bool,
@@ -123,7 +123,7 @@ impl MigrateTableMetadata {
info!("Start scanning key from: {}", String::from_utf8_lossy(&key));
let mut stream = PaginationStream::new(
KvBackendAdapter::wrap(self.etcd_store.clone()),
self.etcd_store.clone(),
RangeRequest::new().with_range(key, range_end),
PAGE_SIZE,
Arc::new(|kv: KeyValue| {
@@ -182,7 +182,7 @@ impl MigrateTableMetadata {
let mut keys = Vec::new();
info!("Start scanning key from: {}", String::from_utf8_lossy(&key));
let mut stream = PaginationStream::new(
KvBackendAdapter::wrap(self.etcd_store.clone()),
self.etcd_store.clone(),
RangeRequest::new().with_range(key, range_end),
PAGE_SIZE,
Arc::new(|kv: KeyValue| {
@@ -234,7 +234,7 @@ impl MigrateTableMetadata {
let mut keys = Vec::new();
info!("Start scanning key from: {}", String::from_utf8_lossy(&key));
let mut stream = PaginationStream::new(
KvBackendAdapter::wrap(self.etcd_store.clone()),
self.etcd_store.clone(),
RangeRequest::new().with_range(key, range_end),
PAGE_SIZE,
Arc::new(|kv: KeyValue| {
@@ -284,7 +284,7 @@ impl MigrateTableMetadata {
info!("Start scanning key from: {}", String::from_utf8_lossy(&key));
let mut stream = PaginationStream::new(
KvBackendAdapter::wrap(self.etcd_store.clone()),
self.etcd_store.clone(),
RangeRequest::new().with_range(key, range_end.clone()),
PAGE_SIZE,
Arc::new(|kv: KeyValue| {

View File

@@ -188,6 +188,7 @@ impl StartCommand {
}
async fn build(self, mut opts: FrontendOptions) -> Result<Instance> {
#[allow(clippy::unnecessary_mut_passed)]
let plugins = plugins::setup_frontend_plugins(&mut opts)
.await
.context(StartFrontendSnafu)?;
@@ -200,7 +201,7 @@ impl StartCommand {
.context(StartFrontendSnafu)?;
instance
.build_servers(&opts)
.build_servers(opts)
.await
.context(StartFrontendSnafu)?;
@@ -312,6 +313,7 @@ mod tests {
..Default::default()
};
#[allow(clippy::unnecessary_mut_passed)]
let plugins = plugins::setup_frontend_plugins(&mut fe_opts).await.unwrap();
let provider = plugins.get::<UserProviderRef>().unwrap();

View File

@@ -12,11 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_config::KvStoreConfig;
use common_config::KvBackendConfig;
use common_telemetry::logging::LoggingOptions;
use config::{Config, Environment, File, FileFormat};
use datanode::config::{DatanodeOptions, ProcedureConfig};
use frontend::frontend::FrontendOptions;
use frontend::error::{Result as FeResult, TomlFormatSnafu};
use frontend::frontend::{FrontendOptions, TomlSerializable};
use meta_srv::metasrv::MetaSrvOptions;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
@@ -27,15 +28,28 @@ pub const ENV_VAR_SEP: &str = "__";
pub const ENV_LIST_SEP: &str = ",";
/// Options mixed up from datanode, frontend and metasrv.
#[derive(Serialize)]
pub struct MixOptions {
pub data_home: String,
pub procedure: ProcedureConfig,
pub metadata_store: KvStoreConfig,
pub metadata_store: KvBackendConfig,
pub frontend: FrontendOptions,
pub datanode: DatanodeOptions,
pub logging: LoggingOptions,
}
impl From<MixOptions> for FrontendOptions {
fn from(value: MixOptions) -> Self {
value.frontend
}
}
impl TomlSerializable for MixOptions {
fn to_toml(&self) -> FeResult<String> {
toml::to_string(self).context(TomlFormatSnafu)
}
}
pub enum Options {
Datanode(Box<DatanodeOptions>),
Frontend(Box<FrontendOptions>),

View File

@@ -19,7 +19,7 @@ use catalog::kvbackend::KvBackendCatalogManager;
use catalog::CatalogManagerRef;
use clap::Parser;
use common_base::Plugins;
use common_config::{metadata_store_dir, KvStoreConfig, WalConfig};
use common_config::{metadata_store_dir, KvBackendConfig, WalConfig};
use common_meta::cache_invalidator::DummyKvCacheInvalidator;
use common_meta::kv_backend::KvBackendRef;
use common_procedure::ProcedureManagerRef;
@@ -97,7 +97,7 @@ pub struct StandaloneOptions {
pub prom_store: PromStoreOptions,
pub wal: WalConfig,
pub storage: StorageConfig,
pub metadata_store: KvStoreConfig,
pub metadata_store: KvBackendConfig,
pub procedure: ProcedureConfig,
pub logging: LoggingOptions,
pub user_provider: Option<String>,
@@ -119,7 +119,7 @@ impl Default for StandaloneOptions {
prom_store: PromStoreOptions::default(),
wal: WalConfig::default(),
storage: StorageConfig::default(),
metadata_store: KvStoreConfig::default(),
metadata_store: KvBackendConfig::default(),
procedure: ProcedureConfig::default(),
logging: LoggingOptions::default(),
user_provider: None,
@@ -316,12 +316,13 @@ impl StartCommand {
#[allow(unused_variables)]
#[allow(clippy::diverging_sub_expression)]
async fn build(self, opts: MixOptions) -> Result<Instance> {
let mut fe_opts = opts.frontend;
let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts)
#[allow(clippy::unnecessary_mut_passed)]
let fe_opts = opts.frontend.clone();
let fe_plugins = plugins::setup_frontend_plugins(&fe_opts)
.await
.context(StartFrontendSnafu)?;
let dn_opts = opts.datanode;
let dn_opts = opts.datanode.clone();
info!("Standalone start command: {:#?}", self);
info!(
@@ -335,23 +336,26 @@ impl StartCommand {
})?;
let metadata_dir = metadata_store_dir(&opts.data_home);
let (kv_store, procedure_manager) = FeInstance::try_build_standalone_components(
let (kv_backend, procedure_manager) = FeInstance::try_build_standalone_components(
metadata_dir,
opts.metadata_store,
opts.procedure,
opts.metadata_store.clone(),
opts.procedure.clone(),
)
.await
.context(StartFrontendSnafu)?;
let datanode =
DatanodeBuilder::new(dn_opts.clone(), Some(kv_store.clone()), Default::default())
.build()
.await
.context(StartDatanodeSnafu)?;
let datanode = DatanodeBuilder::new(
dn_opts.clone(),
Some(kv_backend.clone()),
Default::default(),
)
.build()
.await
.context(StartDatanodeSnafu)?;
let region_server = datanode.region_server();
let catalog_manager = KvBackendCatalogManager::new(
kv_store.clone(),
kv_backend.clone(),
Arc::new(DummyKvCacheInvalidator),
Arc::new(StandaloneDatanodeManager(region_server.clone())),
);
@@ -365,7 +369,7 @@ impl StartCommand {
// TODO: build frontend instance like in distributed mode
let mut frontend = build_frontend(
fe_plugins,
kv_store,
kv_backend,
procedure_manager.clone(),
catalog_manager,
region_server,
@@ -373,7 +377,7 @@ impl StartCommand {
.await?;
frontend
.build_servers(&fe_opts)
.build_servers(opts)
.await
.context(StartFrontendSnafu)?;
@@ -388,13 +392,13 @@ impl StartCommand {
/// Build frontend instance in standalone mode
async fn build_frontend(
plugins: Plugins,
kv_store: KvBackendRef,
kv_backend: KvBackendRef,
procedure_manager: ProcedureManagerRef,
catalog_manager: CatalogManagerRef,
region_server: RegionServer,
) -> Result<FeInstance> {
let frontend_instance = FeInstance::try_new_standalone(
kv_store,
kv_backend,
procedure_manager,
catalog_manager,
plugins,
@@ -426,6 +430,7 @@ mod tests {
..Default::default()
};
#[allow(clippy::unnecessary_mut_passed)]
let plugins = plugins::setup_frontend_plugins(&mut fe_opts).await.unwrap();
let provider = plugins.get::<UserProviderRef>().unwrap();

View File

@@ -54,14 +54,14 @@ pub fn metadata_store_dir(store_dir: &str) -> String {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct KvStoreConfig {
pub struct KvBackendConfig {
// Kv file size in bytes
pub file_size: ReadableSize,
// Kv purge threshold in bytes
pub purge_threshold: ReadableSize,
}
impl Default for KvStoreConfig {
impl Default for KvBackendConfig {
fn default() -> Self {
Self {
// log file size 256MB

View File

@@ -0,0 +1,15 @@
[package]
name = "decimal"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
arrow.workspace = true
bigdecimal = { workspace = true }
common-error = { workspace = true }
common-macro = { workspace = true }
rust_decimal = { workspace = true }
serde.workspace = true
serde_json = "1.0"
snafu.workspace = true

View File

@@ -0,0 +1,394 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use std::hash::Hash;
use std::str::FromStr;
use bigdecimal::{BigDecimal, ToPrimitive};
use rust_decimal::Decimal as RustDecimal;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{
self, BigDecimalOutOfRangeSnafu, Error, InvalidPrecisionOrScaleSnafu, ParseBigDecimalStrSnafu,
ParseRustDecimalStrSnafu,
};
/// The maximum precision for [Decimal128] values
pub const DECIMAL128_MAX_PRECISION: u8 = 38;
/// The maximum scale for [Decimal128] values
pub const DECIMAL128_MAX_SCALE: i8 = 38;
/// The default scale for [Decimal128] values
pub const DECIMAL128_DEFAULT_SCALE: i8 = 10;
/// The maximum bytes length that an accurate RustDecimal can represent
const BYTES_TO_OVERFLOW_RUST_DECIMAL: usize = 28;
/// 128bit decimal, using the i128 to represent the decimal.
///
/// **precision**: the total number of digits in the number, it's range is \[1, 38\].
///
/// **scale**: the number of digits to the right of the decimal point, it's range is \[0, precision\].
#[derive(Debug, Default, Eq, Copy, Clone, Serialize, Deserialize)]
pub struct Decimal128 {
value: i128,
precision: u8,
scale: i8,
}
impl Decimal128 {
/// Create a new Decimal128 from i128, precision and scale.
pub fn new_unchecked(value: i128, precision: u8, scale: i8) -> Self {
Self {
value,
precision,
scale,
}
}
pub fn try_new(value: i128, precision: u8, scale: i8) -> error::Result<Self> {
// make sure the precision and scale is valid.
valid_precision_and_scale(precision, scale)?;
Ok(Self {
value,
precision,
scale,
})
}
pub fn val(&self) -> i128 {
self.value
}
/// Returns the precision of this decimal.
pub fn precision(&self) -> u8 {
self.precision
}
/// Returns the scale of this decimal.
pub fn scale(&self) -> i8 {
self.scale
}
/// Convert to ScalarValue
pub fn to_scalar_value(&self) -> (Option<i128>, u8, i8) {
(Some(self.value), self.precision, self.scale)
}
}
impl PartialEq for Decimal128 {
fn eq(&self, other: &Self) -> bool {
self.precision.eq(&other.precision)
&& self.scale.eq(&other.scale)
&& self.value.eq(&other.value)
}
}
// Two decimal values can be compared if they have the same precision and scale.
impl PartialOrd for Decimal128 {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
if self.precision == other.precision && self.scale == other.scale {
return self.value.partial_cmp(&other.value);
}
None
}
}
/// Convert from string to Decimal128
/// If the string length is less than 28, the result of rust_decimal will underflow,
/// In this case, use BigDecimal to get accurate result.
impl FromStr for Decimal128 {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let len = s.as_bytes().len();
if len <= BYTES_TO_OVERFLOW_RUST_DECIMAL {
let rd = RustDecimal::from_str_exact(s).context(ParseRustDecimalStrSnafu { raw: s })?;
Ok(Self::from(rd))
} else {
let bd = BigDecimal::from_str(s).context(ParseBigDecimalStrSnafu { raw: s })?;
Self::try_from(bd)
}
}
}
impl Display for Decimal128 {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
format_decimal_str(&self.value.to_string(), self.precision as usize, self.scale)
)
}
}
impl Hash for Decimal128 {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_i128(self.value);
state.write_u8(self.precision);
state.write_i8(self.scale);
}
}
impl From<Decimal128> for serde_json::Value {
fn from(decimal: Decimal128) -> Self {
serde_json::Value::String(decimal.to_string())
}
}
impl From<Decimal128> for i128 {
fn from(decimal: Decimal128) -> Self {
decimal.val()
}
}
impl From<i128> for Decimal128 {
fn from(value: i128) -> Self {
Self {
value,
precision: DECIMAL128_MAX_PRECISION,
scale: DECIMAL128_DEFAULT_SCALE,
}
}
}
/// Convert from RustDecimal to Decimal128
/// RustDecimal can represent the range is smaller than Decimal128,
/// it is safe to convert RustDecimal to Decimal128
impl From<RustDecimal> for Decimal128 {
fn from(rd: RustDecimal) -> Self {
let s = rd.to_string();
let precision = (s.len() - s.matches(&['.', '-'][..]).count()) as u8;
Self {
value: rd.mantissa(),
precision,
scale: rd.scale() as i8,
}
}
}
/// Try from BigDecimal to Decimal128
/// The range that BigDecimal can represent is larger than Decimal128,
/// so it is not safe to convert BigDecimal to Decimal128,
/// If the BigDecimal is out of range, return error.
impl TryFrom<BigDecimal> for Decimal128 {
type Error = Error;
fn try_from(value: BigDecimal) -> Result<Self, Self::Error> {
let precision = value.digits();
let (big_int, scale) = value.as_bigint_and_exponent();
// convert big_int to i128, if convert failed, return error
big_int
.to_i128()
.map(|val| Self::try_new(val, precision as u8, scale as i8))
.unwrap_or_else(|| BigDecimalOutOfRangeSnafu { value }.fail())
}
}
/// Port from arrow-rs,
/// see https://github.com/Apache/arrow-rs/blob/master/arrow-array/src/types.rs#L1323-L1344
fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -> String {
let (sign, rest) = match value_str.strip_prefix('-') {
Some(stripped) => ("-", stripped),
None => ("", value_str),
};
let bound = precision.min(rest.len()) + sign.len();
let value_str = &value_str[0..bound];
if scale == 0 {
value_str.to_string()
} else if scale < 0 {
let padding = value_str.len() + scale.unsigned_abs() as usize;
format!("{value_str:0<padding$}")
} else if rest.len() > scale as usize {
// Decimal separator is in the middle of the string
let (whole, decimal) = value_str.split_at(value_str.len() - scale as usize);
format!("{whole}.{decimal}")
} else {
// String has to be padded
format!("{}0.{:0>width$}", sign, rest, width = scale as usize)
}
}
/// check whether precision and scale is valid
fn valid_precision_and_scale(precision: u8, scale: i8) -> error::Result<()> {
if precision == 0 {
return InvalidPrecisionOrScaleSnafu {
reason: format!(
"precision cannot be 0, has to be between [1, {}]",
DECIMAL128_MAX_PRECISION
),
}
.fail();
}
if precision > DECIMAL128_MAX_PRECISION {
return InvalidPrecisionOrScaleSnafu {
reason: format!(
"precision {} is greater than max {}",
precision, DECIMAL128_MAX_PRECISION
),
}
.fail();
}
if scale > DECIMAL128_MAX_SCALE {
return InvalidPrecisionOrScaleSnafu {
reason: format!(
"scale {} is greater than max {}",
scale, DECIMAL128_MAX_SCALE
),
}
.fail();
}
if scale > 0 && scale > precision as i8 {
return InvalidPrecisionOrScaleSnafu {
reason: format!("scale {} is greater than precision {}", scale, precision),
}
.fail();
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_common_decimal128() {
let decimal = Decimal128::new_unchecked(123456789, 9, 3);
assert_eq!(decimal.to_string(), "123456.789");
let decimal = Decimal128::try_new(123456789, 9, 0);
assert_eq!(decimal.unwrap().to_string(), "123456789");
let decimal = Decimal128::try_new(123456789, 9, 2);
assert_eq!(decimal.unwrap().to_string(), "1234567.89");
let decimal = Decimal128::try_new(123, 3, -2);
assert_eq!(decimal.unwrap().to_string(), "12300");
// invalid precision or scale
// precision is 0
let decimal = Decimal128::try_new(123, 0, 0);
assert!(decimal.is_err());
// precision is greater than 38
let decimal = Decimal128::try_new(123, 39, 0);
assert!(decimal.is_err());
// scale is greater than 38
let decimal = Decimal128::try_new(123, 38, 39);
assert!(decimal.is_err());
// scale is greater than precision
let decimal = Decimal128::try_new(123, 3, 4);
assert!(decimal.is_err());
}
#[test]
fn test_decimal128_from_str() {
// 0 < precision <= 28
let decimal = Decimal128::from_str("1234567890.123456789").unwrap();
assert_eq!(decimal.to_string(), "1234567890.123456789");
assert_eq!(decimal.precision(), 19);
assert_eq!(decimal.scale(), 9);
let decimal = Decimal128::from_str("1234567890.123456789012345678").unwrap();
assert_eq!(decimal.to_string(), "1234567890.123456789012345678");
assert_eq!(decimal.precision(), 28);
assert_eq!(decimal.scale(), 18);
// 28 < precision <= 38
let decimal = Decimal128::from_str("1234567890.1234567890123456789012").unwrap();
assert_eq!(decimal.to_string(), "1234567890.1234567890123456789012");
assert_eq!(decimal.precision(), 32);
assert_eq!(decimal.scale(), 22);
let decimal = Decimal128::from_str("1234567890.1234567890123456789012345678").unwrap();
assert_eq!(
decimal.to_string(),
"1234567890.1234567890123456789012345678"
);
assert_eq!(decimal.precision(), 38);
assert_eq!(decimal.scale(), 28);
// precision > 38
let decimal = Decimal128::from_str("1234567890.12345678901234567890123456789");
assert!(decimal.is_err());
}
#[test]
#[ignore]
fn test_parse_decimal128_speed() {
// RustDecimal::from_str: 1.124855167s
for _ in 0..1500000 {
let _ = RustDecimal::from_str("1234567890.123456789012345678999").unwrap();
}
// BigDecimal::try_from: 6.799290042s
for _ in 0..1500000 {
let _ = BigDecimal::from_str("1234567890.123456789012345678999").unwrap();
}
}
#[test]
fn test_decimal128_precision_and_scale() {
// precision and scale from Deicmal(1,1) to Decimal(38,38)
for precision in 1..=38 {
for scale in 1..=precision {
let decimal_str = format!("0.{}", "1".repeat(scale as usize));
let decimal = Decimal128::from_str(&decimal_str).unwrap();
assert_eq!(decimal_str, decimal.to_string());
}
}
}
#[test]
fn test_decimal128_compare() {
// the same precision and scale
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
assert!(decimal1 == decimal2);
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123456789012345678998").unwrap();
assert!(decimal1 > decimal2);
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123456789012345678998").unwrap();
assert!(decimal2 < decimal1);
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123456789012345678998").unwrap();
assert!(decimal1 >= decimal2);
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123456789012345678998").unwrap();
assert!(decimal2 <= decimal1);
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123456789012345678998").unwrap();
assert!(decimal1 != decimal2);
// different precision and scale cmp is None
let decimal1 = Decimal128::from_str("1234567890.123456789012345678999").unwrap();
let decimal2 = Decimal128::from_str("1234567890.123").unwrap();
assert_eq!(decimal1.partial_cmp(&decimal2), None);
}
}

View File

@@ -0,0 +1,72 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use bigdecimal::BigDecimal;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
#[derive(Snafu)]
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Decimal out of range, decimal value: {}", value))]
BigDecimalOutOfRange {
value: BigDecimal,
location: Location,
},
#[snafu(display("Failed to parse string to rust decimal, raw: {}", raw))]
ParseRustDecimalStr {
raw: String,
#[snafu(source)]
error: rust_decimal::Error,
},
#[snafu(display("Failed to parse string to big decimal, raw: {}", raw))]
ParseBigDecimalStr {
raw: String,
#[snafu(source)]
error: bigdecimal::ParseBigDecimalError,
},
#[snafu(display("Invalid precision or scale, resion: {}", reason))]
InvalidPrecisionOrScale { reason: String, location: Location },
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::BigDecimalOutOfRange { .. } => StatusCode::Internal,
Error::ParseRustDecimalStr { .. }
| Error::InvalidPrecisionOrScale { .. }
| Error::ParseBigDecimalStr { .. } => StatusCode::InvalidArguments,
}
}
fn location_opt(&self) -> Option<common_error::snafu::Location> {
match self {
Error::BigDecimalOutOfRange { location, .. } => Some(*location),
Error::InvalidPrecisionOrScale { location, .. } => Some(*location),
Error::ParseRustDecimalStr { .. } | Error::ParseBigDecimalStr { .. } => None,
}
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -12,28 +12,5 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_meta::rpc::KeyValue;
pub struct KvPair<'a>(&'a etcd_client::KeyValue);
impl<'a> KvPair<'a> {
/// Creates a `KvPair` from etcd KeyValue
#[inline]
pub fn new(kv: &'a etcd_client::KeyValue) -> Self {
Self(kv)
}
#[inline]
pub fn from_etcd_kv(kv: &etcd_client::KeyValue) -> KeyValue {
KeyValue::from(KvPair::new(kv))
}
}
impl<'a> From<KvPair<'a>> for KeyValue {
fn from(kv: KvPair<'a>) -> Self {
Self {
key: kv.0.key().to_vec(),
value: kv.0.value().to_vec(),
}
}
}
pub mod decimal128;
pub mod error;

View File

@@ -40,7 +40,7 @@ pub trait ErrorExt: StackError {
Self: Sized,
{
match self.status_code() {
StatusCode::Unknown | StatusCode::Unexpected | StatusCode::Internal => {
StatusCode::Unknown | StatusCode::Internal => {
// masks internal error from end user
format!("Internal error: {}", self.status_code() as u32)
}

View File

@@ -14,10 +14,10 @@
use std::fmt;
use strum::EnumString;
use strum::{AsRefStr, EnumString};
/// Common status code for public API.
#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString, AsRefStr)]
pub enum StatusCode {
// ====== Begin of common status code ==============
/// Success.

View File

@@ -16,7 +16,8 @@ use std::fmt;
use common_query::error::{self, Result};
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute::kernels::{arithmetic, cast};
use datafusion::arrow::compute::kernels::numeric;
use datatypes::arrow::compute::kernels::cast;
use datatypes::arrow::datatypes::DataType;
use datatypes::prelude::*;
use datatypes::vectors::{Helper, VectorRef};
@@ -51,11 +52,11 @@ impl Function for RateFunction {
let val = &columns[0].to_arrow_array();
let val_0 = val.slice(0, val.len() - 1);
let val_1 = val.slice(1, val.len() - 1);
let dv = arithmetic::subtract_dyn(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
let dv = numeric::sub(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
let ts = &columns[1].to_arrow_array();
let ts_0 = ts.slice(0, ts.len() - 1);
let ts_1 = ts.slice(1, ts.len() - 1);
let dt = arithmetic::subtract_dyn(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
let dt = numeric::sub(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
let dv = cast::cast(&dv, &DataType::Float64).context(error::TypeCastSnafu {
typ: DataType::Float64,
@@ -63,7 +64,7 @@ impl Function for RateFunction {
let dt = cast::cast(&dt, &DataType::Float64).context(error::TypeCastSnafu {
typ: DataType::Float64,
})?;
let rate = arithmetic::divide_dyn(&dv, &dt).context(error::ArrowComputeSnafu)?;
let rate = numeric::div(&dv, &dt).context(error::ArrowComputeSnafu)?;
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
Ok(v)

View File

@@ -18,9 +18,9 @@ use common_query::error::{
self, ArrowComputeSnafu, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu,
};
use common_query::prelude::{Signature, Volatility};
use datafusion::arrow::compute::kernels::cmp::gt;
use datatypes::arrow::array::AsArray;
use datatypes::arrow::compute::cast;
use datatypes::arrow::compute::kernels::comparison::gt_dyn;
use datatypes::arrow::compute::kernels::zip;
use datatypes::arrow::datatypes::{DataType as ArrowDataType, Date32Type};
use datatypes::prelude::ConcreteDataType;
@@ -72,7 +72,7 @@ impl Function for GreatestFunction {
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date32)
.context(ArrowComputeSnafu)?;
let column2 = column2.as_primitive::<Date32Type>();
let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?;
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
let result =
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
@@ -82,7 +82,7 @@ impl Function for GreatestFunction {
let column1 = column1.as_primitive::<Date32Type>();
let column2 = columns[1].to_arrow_array();
let column2 = column2.as_primitive::<Date32Type>();
let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?;
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
let result =
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)

View File

@@ -18,36 +18,50 @@ use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility};
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use common_time::{Date, DateTime, Timestamp};
use datatypes::prelude::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::vectors::{
Int64Vector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector, Vector, VectorRef,
};
use datatypes::vectors::{Int64Vector, VectorRef};
use snafu::ensure;
use crate::scalars::function::{Function, FunctionContext};
/// A function to convert the column into the unix timestamp in seconds.
#[derive(Clone, Debug, Default)]
pub struct ToUnixtimeFunction;
const NAME: &str = "to_unixtime";
fn convert_to_seconds(arg: &str) -> Option<i64> {
match Timestamp::from_str(arg) {
Ok(ts) => {
let sec_mul = (TimeUnit::Second.factor() / ts.unit().factor()) as i64;
Some(ts.value().div_euclid(sec_mul))
}
Err(_err) => None,
if let Ok(dt) = DateTime::from_str(arg) {
return Some(dt.val() / 1000);
}
if let Ok(ts) = Timestamp::from_str(arg) {
return Some(ts.split().0);
}
if let Ok(date) = Date::from_str(arg) {
return Some(date.to_secs());
}
None
}
fn process_vector(vector: &dyn Vector) -> Vec<Option<i64>> {
fn convert_timestamps_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| paste::expr!((vector.get(i)).as_timestamp().map(|ts| ts.value())))
.map(|i| vector.get(i).as_timestamp().map(|ts| ts.split().0))
.collect::<Vec<Option<i64>>>()
}
fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| vector.get(i).as_date().map(|dt| dt.to_secs()))
.collect::<Vec<Option<i64>>>()
}
fn convert_datetimes_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| vector.get(i).as_datetime().map(|dt| dt.val() / 1000))
.collect::<Vec<Option<i64>>>()
}
@@ -67,6 +81,8 @@ impl Function for ToUnixtimeFunction {
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -87,51 +103,29 @@ impl Function for ToUnixtimeFunction {
}
);
let vector = &columns[0];
match columns[0].data_type() {
ConcreteDataType::String(_) => {
let array = columns[0].to_arrow_array();
let vector = StringVector::try_from_arrow_array(&array).unwrap();
Ok(Arc::new(Int64Vector::from(
(0..vector.len())
.map(|i| convert_to_seconds(&vector.get(i).to_string()))
.collect::<Vec<_>>(),
)))
}
ConcreteDataType::String(_) => Ok(Arc::new(Int64Vector::from(
(0..vector.len())
.map(|i| convert_to_seconds(&vector.get(i).to_string()))
.collect::<Vec<_>>(),
))),
ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => {
let array = columns[0].to_arrow_array();
Ok(Arc::new(Int64Vector::try_from_arrow_array(&array).unwrap()))
// Safety: cast always successfully at here
Ok(vector.cast(&ConcreteDataType::int64_datatype()).unwrap())
}
ConcreteDataType::Timestamp(ts) => {
let array = columns[0].to_arrow_array();
let value = match ts {
TimestampType::Second(_) => {
let vector = paste::expr!(TimestampSecondVector::try_from_arrow_array(
array
)
.unwrap());
process_vector(&vector)
}
TimestampType::Millisecond(_) => {
let vector = paste::expr!(
TimestampMillisecondVector::try_from_arrow_array(array).unwrap()
);
process_vector(&vector)
}
TimestampType::Microsecond(_) => {
let vector = paste::expr!(
TimestampMicrosecondVector::try_from_arrow_array(array).unwrap()
);
process_vector(&vector)
}
TimestampType::Nanosecond(_) => {
let vector = paste::expr!(TimestampNanosecondVector::try_from_arrow_array(
array
)
.unwrap());
process_vector(&vector)
}
};
Ok(Arc::new(Int64Vector::from(value)))
ConcreteDataType::Date(_) => {
let seconds = convert_dates_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
ConcreteDataType::DateTime(_) => {
let seconds = convert_datetimes_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
ConcreteDataType::Timestamp(_) => {
let seconds = convert_timestamps_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
@@ -151,11 +145,11 @@ impl fmt::Display for ToUnixtimeFunction {
#[cfg(test)]
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder};
use datatypes::scalars::ScalarVector;
use datatypes::timestamp::TimestampSecond;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{StringVector, TimestampSecondVector};
use datatypes::vectors::{
DateTimeVector, DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
};
use super::{ToUnixtimeFunction, *};
use crate::scalars::Function;
@@ -170,18 +164,20 @@ mod tests {
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
));
let times = vec![
@@ -212,26 +208,6 @@ mod tests {
#[test]
fn test_int_to_unixtime() {
let f = ToUnixtimeFunction;
assert_eq!("to_unixtime", f.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
f.return_type(&[]).unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
));
let times = vec![Some(3_i64), None, Some(5_i64), None];
let results = [Some(3), None, Some(5), None];
@@ -254,38 +230,13 @@ mod tests {
}
#[test]
fn test_timestamp_to_unixtime() {
fn test_date_to_unixtime() {
let f = ToUnixtimeFunction;
assert_eq!("to_unixtime", f.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
f.return_type(&[]).unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
));
let times: Vec<Option<TimestampSecond>> = vec![
Some(TimestampSecond::new(123)),
None,
Some(TimestampSecond::new(42)),
None,
];
let results = [Some(123), None, Some(42), None];
let ts_vector: TimestampSecondVector = build_vector_from_slice(&times);
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
let times = vec![Some(123), None, Some(42), None];
let results = [Some(10627200), None, Some(3628800), None];
let date_vector = DateVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
@@ -303,11 +254,73 @@ mod tests {
}
}
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
let mut builder = T::Builder::with_capacity(items.len());
for item in items {
builder.push(*item);
#[test]
fn test_datetime_to_unixtime() {
let f = ToUnixtimeFunction;
let times = vec![Some(123000), None, Some(42000), None];
let results = [Some(123), None, Some(42), None];
let date_vector = DateTimeVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 || i == 3 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Int64(ts) => {
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
}
_ => unreachable!(),
}
}
}
#[test]
fn test_timestamp_to_unixtime() {
let f = ToUnixtimeFunction;
let times = vec![Some(123), None, Some(42), None];
let results = [Some(123), None, Some(42), None];
let ts_vector = TimestampSecondVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 || i == 3 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Int64(ts) => {
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
}
_ => unreachable!(),
}
}
let times = vec![Some(123000), None, Some(42000), None];
let results = [Some(123), None, Some(42), None];
let ts_vector = TimestampMillisecondVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 || i == 3 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Int64(ts) => {
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
}
_ => unreachable!(),
}
}
builder.finish()
}
}

View File

@@ -210,6 +210,7 @@ pub struct GreptimeDBTelemetry {
working_home: Option<String>,
telemetry_url: &'static str,
should_report: Arc<AtomicBool>,
report_times: usize,
}
#[async_trait::async_trait]
@@ -242,6 +243,7 @@ impl GreptimeDBTelemetry {
client: client.ok(),
telemetry_url: TELEMETRY_URL,
should_report,
report_times: 0,
}
}
@@ -259,8 +261,11 @@ impl GreptimeDBTelemetry {
};
if let Some(client) = self.client.as_ref() {
info!("reporting greptimedb version: {:?}", data);
if self.report_times == 0 {
info!("reporting greptimedb version: {:?}", data);
}
let result = client.post(self.telemetry_url).json(&data).send().await;
self.report_times += 1;
debug!("report version result: {:?}", result);
result.ok()
} else {

View File

@@ -163,7 +163,14 @@ impl ChannelManager {
}
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
let mut endpoint = Endpoint::new(format!("http://{addr}")).context(CreateChannelSnafu)?;
let http_prefix = if self.client_tls_config.is_some() {
"https"
} else {
"http"
};
let mut endpoint =
Endpoint::new(format!("{http_prefix}://{addr}")).context(CreateChannelSnafu)?;
if let Some(dur) = self.config.timeout {
endpoint = endpoint.timeout(dur);

View File

@@ -241,7 +241,7 @@ mod test {
.unwrap();
let flight_data = batches_to_flight_data(
arrow_schema,
&arrow_schema,
vec![
batch1.clone().into_df_record_batch(),
batch2.clone().into_df_record_batch(),

View File

@@ -10,6 +10,7 @@ testing = []
[dependencies]
api = { workspace = true }
arrow-flight.workspace = true
async-recursion = "1.0"
async-stream.workspace = true
async-trait.workspace = true
base64 = "0.21"
@@ -28,7 +29,7 @@ etcd-client.workspace = true
futures.workspace = true
humantime-serde.workspace = true
lazy_static.workspace = true
metrics.workspace = true
prometheus.workspace = true
prost.workspace = true
regex.workspace = true
serde.workspace = true
@@ -38,6 +39,7 @@ store-api = { workspace = true }
strum.workspace = true
table = { workspace = true }
tokio.workspace = true
tonic.workspace = true
[dev-dependencies]
chrono.workspace = true

View File

@@ -374,10 +374,11 @@ impl Procedure for AlterTableProcedure {
let state = &self.data.state;
let _timer = common_telemetry::timer!(
metrics::METRIC_META_PROCEDURE_ALTER_TABLE,
&[("step", state.as_ref().to_string())]
);
let step = state.as_ref();
let _timer = metrics::METRIC_META_PROCEDURE_ALTER_TABLE
.with_label_values(&[step])
.start_timer();
match state {
AlterTableState::Prepare => self.on_prepare().await,

View File

@@ -251,10 +251,9 @@ impl Procedure for CreateTableProcedure {
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
let state = &self.creator.data.state;
let _timer = common_telemetry::timer!(
metrics::METRIC_META_PROCEDURE_CREATE_TABLE,
&[("step", state.as_ref().to_string())]
);
let _timer = metrics::METRIC_META_PROCEDURE_CREATE_TABLE
.with_label_values(&[state.as_ref()])
.start_timer();
match state {
CreateTableState::Prepare => self.on_prepare().await,

View File

@@ -197,10 +197,9 @@ impl Procedure for DropTableProcedure {
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
let state = &self.data.state;
let _timer = common_telemetry::timer!(
metrics::METRIC_META_PROCEDURE_DROP_TABLE,
&[("step", state.as_ref().to_string())]
);
let _timer = metrics::METRIC_META_PROCEDURE_DROP_TABLE
.with_label_values(&[state.as_ref()])
.start_timer();
match self.data.state {
DropTableState::Prepare => self.on_prepare().await,

View File

@@ -55,10 +55,9 @@ impl Procedure for TruncateTableProcedure {
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
let state = &self.data.state;
let _timer = common_telemetry::timer!(
metrics::METRIC_META_PROCEDURE_TRUNCATE_TABLE,
&[("step", state.as_ref().to_string())]
);
let _timer = metrics::METRIC_META_PROCEDURE_TRUNCATE_TABLE
.with_label_values(&[state.as_ref()])
.start_timer();
match self.data.state {
TruncateTableState::Prepare => self.on_prepare().await,

View File

@@ -28,6 +28,26 @@ use crate::peer::Peer;
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Empty key is not allowed"))]
EmptyKey { location: Location },
#[snafu(display("Invalid result with a txn response: {}", err_msg))]
InvalidTxnResult { err_msg: String, location: Location },
#[snafu(display("Failed to connect to Etcd"))]
ConnectEtcd {
#[snafu(source)]
error: etcd_client::Error,
location: Location,
},
#[snafu(display("Failed to execute via Etcd"))]
EtcdFailed {
#[snafu(source)]
error: etcd_client::Error,
location: Location,
},
#[snafu(display("Failed to get sequence: {}", err_msg))]
NextSequence { err_msg: String, location: Location },
@@ -254,7 +274,10 @@ impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
use Error::*;
match self {
IllegalServerState { .. } | EtcdTxnOpResponse { .. } => StatusCode::Internal,
IllegalServerState { .. }
| EtcdTxnOpResponse { .. }
| EtcdFailed { .. }
| ConnectEtcd { .. } => StatusCode::Internal,
SerdeJson { .. }
| ParseOption { .. }
@@ -267,7 +290,8 @@ impl ErrorExt for Error {
| NextSequence { .. }
| SequenceOutOfRange { .. }
| UnexpectedSequenceValue { .. }
| InvalidHeartbeatResponse { .. } => StatusCode::Unexpected,
| InvalidHeartbeatResponse { .. }
| InvalidTxnResult { .. } => StatusCode::Unexpected,
SendMessage { .. }
| GetKvCache { .. }
@@ -277,7 +301,7 @@ impl ErrorExt for Error {
| RenameTable { .. }
| Unsupported { .. } => StatusCode::Internal,
PrimaryKeyNotFound { .. } => StatusCode::InvalidArguments,
PrimaryKeyNotFound { .. } | &EmptyKey { .. } => StatusCode::InvalidArguments,
TableNotFound { .. } => StatusCode::TableNotFound,
TableAlreadyExists { .. } => StatusCode::TableAlreadyExists,
@@ -319,4 +343,16 @@ impl Error {
pub fn is_retry_later(&self) -> bool {
matches!(self, Error::RetryLater { .. })
}
/// Returns true if the response exceeds the size limit.
pub fn is_exceeded_size_limit(&self) -> bool {
if let Error::EtcdFailed {
error: etcd_client::Error::GRpcStatus(status),
..
} = self
{
return status.code() == tonic::Code::OutOfRange;
}
false
}
}

View File

@@ -54,6 +54,8 @@ pub mod table_region;
// TODO(weny): removes it.
#[allow(deprecated)]
pub mod table_route;
#[cfg(any(test, feature = "testing"))]
pub mod test_utils;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Debug;
@@ -89,12 +91,20 @@ pub const REMOVED_PREFIX: &str = "__removed";
const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.]*";
const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
const TABLE_REGION_KEY_PREFIX: &str = "__table_region";
const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name";
const TABLE_ROUTE_PREFIX: &str = "__table_route";
pub const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
pub const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name";
pub const TABLE_ROUTE_PREFIX: &str = "__table_route";
pub const CACHE_KEY_PREFIXES: [&str; 4] = [
TABLE_NAME_KEY_PREFIX,
CATALOG_NAME_KEY_PREFIX,
SCHEMA_NAME_KEY_PREFIX,
TABLE_ROUTE_PREFIX,
];
pub type RegionDistribution = BTreeMap<DatanodeId, Vec<RegionNumber>>;
@@ -684,12 +694,11 @@ mod tests {
use std::sync::Arc;
use bytes::Bytes;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder};
use futures::TryStreamExt;
use table::metadata::{RawTableInfo, TableInfo, TableInfoBuilder, TableMetaBuilder};
use table::metadata::{RawTableInfo, TableInfo};
use super::datanode_table::DatanodeTableKey;
use super::test_utils;
use crate::ddl::utils::region_storage_path;
use crate::key::datanode_table::RegionInfo;
use crate::key::table_info::TableInfoValue;
@@ -735,40 +744,6 @@ mod tests {
assert_eq!(removed, to_removed_key(key));
}
fn new_test_table_info(region_numbers: impl Iterator<Item = u32>) -> TableInfo {
let column_schemas = vec![
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true),
ColumnSchema::new("col2", ConcreteDataType::int32_datatype(), true),
];
let schema = SchemaBuilder::try_from(column_schemas)
.unwrap()
.version(123)
.build()
.unwrap();
let meta = TableMetaBuilder::default()
.schema(Arc::new(schema))
.primary_key_indices(vec![0])
.engine("engine")
.next_column_id(3)
.region_numbers(region_numbers.collect::<Vec<_>>())
.build()
.unwrap();
TableInfoBuilder::default()
.table_id(10)
.table_version(5)
.name("mytable")
.meta(meta)
.build()
.unwrap()
}
fn new_test_region_route() -> RegionRoute {
new_region_route(1, 2)
}
@@ -783,9 +758,14 @@ mod tests {
},
leader_peer: Some(Peer::new(datanode, "a2")),
follower_peers: vec![],
leader_status: None,
}
}
fn new_test_table_info(region_numbers: impl Iterator<Item = u32>) -> TableInfo {
test_utils::new_test_table_info(10, region_numbers)
}
#[tokio::test]
async fn test_create_table_metadata() {
let mem_kv = Arc::new(MemoryKvBackend::default());

View File

@@ -16,10 +16,8 @@ use std::fmt::Display;
use std::sync::Arc;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use common_telemetry::timer;
use futures::stream::BoxStream;
use futures::StreamExt;
use metrics::increment_counter;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
@@ -104,7 +102,7 @@ impl CatalogManager {
/// Creates `CatalogNameKey`.
pub async fn create(&self, catalog: CatalogNameKey<'_>, if_not_exists: bool) -> Result<()> {
let _timer = timer!(crate::metrics::METRIC_META_CREATE_CATALOG);
let _timer = crate::metrics::METRIC_META_CREATE_CATALOG.start_timer();
let raw_key = catalog.as_raw_key();
let raw_value = CatalogNameValue.try_as_raw_value()?;
@@ -113,7 +111,7 @@ impl CatalogManager {
.put_conditionally(raw_key, raw_value, if_not_exists)
.await?
{
increment_counter!(crate::metrics::METRIC_META_CREATE_CATALOG);
crate::metrics::METRIC_META_CREATE_CATALOG_COUNTER.inc();
}
Ok(())

View File

@@ -18,11 +18,9 @@ use std::sync::Arc;
use std::time::Duration;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_telemetry::timer;
use futures::stream::BoxStream;
use futures::StreamExt;
use humantime_serde::re::humantime;
use metrics::increment_counter;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
@@ -146,7 +144,7 @@ impl SchemaManager {
value: Option<SchemaNameValue>,
if_not_exists: bool,
) -> Result<()> {
let _timer = timer!(crate::metrics::METRIC_META_CREATE_SCHEMA);
let _timer = crate::metrics::METRIC_META_CREATE_SCHEMA.start_timer();
let raw_key = schema.as_raw_key();
let raw_value = value.unwrap_or_default().try_as_raw_value()?;
@@ -155,7 +153,7 @@ impl SchemaManager {
.put_conditionally(raw_key, raw_value, if_not_exists)
.await?
{
increment_counter!(crate::metrics::METRIC_META_CREATE_SCHEMA);
crate::metrics::METRIC_META_CREATE_SCHEMA_COUNTER.inc();
}
Ok(())

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use table::engine::TableReference;
use table::metadata::{RawTableInfo, TableId};
@@ -21,6 +23,7 @@ use crate::error::Result;
use crate::key::{to_removed_key, TableMetaKey};
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::BatchGetRequest;
use crate::table_name::TableName;
pub struct TableInfoKey {
@@ -233,6 +236,37 @@ impl TableInfoManager {
.map(|x| DeserializedValueWithBytes::from_inner_slice(&x.value))
.transpose()
}
pub async fn batch_get(
&self,
table_ids: &[TableId],
) -> Result<HashMap<TableId, TableInfoValue>> {
let lookup_table = table_ids
.iter()
.map(|id| (TableInfoKey::new(*id).as_raw_key(), id))
.collect::<HashMap<_, _>>();
let resp = self
.kv_backend
.batch_get(BatchGetRequest {
keys: lookup_table.keys().cloned().collect::<Vec<_>>(),
})
.await?;
let values = resp
.kvs
.iter()
.map(|kv| {
Ok((
// Safety: must exist.
**lookup_table.get(kv.key()).unwrap(),
TableInfoValue::try_from_raw_value(&kv.value)?,
))
})
.collect::<Result<HashMap<_, _>>>()?;
Ok(values)
}
}
#[cfg(test)]

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::fmt::Display;
use serde::{Deserialize, Serialize};
@@ -23,6 +24,7 @@ use crate::key::{to_removed_key, RegionDistribution, TableMetaKey, TABLE_ROUTE_P
use crate::kv_backend::txn::{Compare, CompareOp, Txn, TxnOp, TxnOpResponse};
use crate::kv_backend::KvBackendRef;
use crate::rpc::router::{region_distribution, RegionRoute};
use crate::rpc::store::BatchGetRequest;
pub struct TableRouteKey {
pub table_id: TableId,
@@ -197,6 +199,38 @@ impl TableRouteManager {
.transpose()
}
/// It may return a subset of the `table_ids`.
pub async fn batch_get(
&self,
table_ids: &[TableId],
) -> Result<HashMap<TableId, TableRouteValue>> {
let lookup_table = table_ids
.iter()
.map(|id| (TableRouteKey::new(*id).as_raw_key(), id))
.collect::<HashMap<_, _>>();
let resp = self
.kv_backend
.batch_get(BatchGetRequest {
keys: lookup_table.keys().cloned().collect::<Vec<_>>(),
})
.await?;
let values = resp
.kvs
.iter()
.map(|kv| {
Ok((
// Safety: must exist.
**lookup_table.get(kv.key()).unwrap(),
TableRouteValue::try_from_raw_value(&kv.value)?,
))
})
.collect::<Result<HashMap<_, _>>>()?;
Ok(values)
}
#[cfg(test)]
pub async fn get_removed(
&self,

View File

@@ -0,0 +1,57 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder};
use store_api::storage::TableId;
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
table_id: TableId,
region_numbers: I,
) -> TableInfo {
let column_schemas = vec![
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true),
ColumnSchema::new("col2", ConcreteDataType::int32_datatype(), true),
];
let schema = SchemaBuilder::try_from(column_schemas)
.unwrap()
.version(123)
.build()
.unwrap();
let meta = TableMetaBuilder::default()
.schema(Arc::new(schema))
.primary_key_indices(vec![0])
.engine("engine")
.next_column_id(3)
.region_numbers(region_numbers.into_iter().collect::<Vec<_>>())
.build()
.unwrap();
TableInfoBuilder::default()
.table_id(table_id)
.table_version(5)
.name("mytable")
.meta(meta)
.build()
.unwrap()
}

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod etcd;
pub mod memory;
pub mod test;
pub mod txn;
@@ -27,8 +28,7 @@ use crate::error::Error;
use crate::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use crate::rpc::KeyValue;
@@ -66,9 +66,6 @@ where
req: BatchDeleteRequest,
) -> Result<BatchDeleteResponse, Self::Error>;
/// MoveValue atomically renames the key to the given updated key.
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse, Self::Error>;
// The following methods are implemented based on the above methods,
// and a higher-level interface is provided for to simplify usage.
@@ -131,3 +128,12 @@ where
}
}
}
pub trait ResettableKvBackend: KvBackend
where
Self::Error: ErrorExt,
{
fn reset(&self);
}
pub type ResettableKvBackendRef = Arc<dyn ResettableKvBackend<Error = Error> + Send + Sync>;

View File

@@ -15,27 +15,47 @@
use std::any::Any;
use std::sync::Arc;
use common_meta::kv_backend::txn::{Txn as KvTxn, TxnResponse as KvTxnResponse};
use common_meta::kv_backend::{KvBackend, TxnService};
use common_meta::metrics::METRIC_META_TXN_REQUEST;
use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
};
use common_meta::rpc::KeyValue;
use common_telemetry::{timer, warn};
use etcd_client::{
Client, Compare, CompareOp, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse,
TxnResponse,
};
use snafu::{ensure, OptionExt, ResultExt};
use crate::error;
use crate::error::{ConvertEtcdTxnObjectSnafu, Error, Result};
use crate::service::store::etcd_util::KvPair;
use crate::service::store::kv::KvStoreRef;
use super::KvBackendRef;
use crate::error::{self, Error, Result};
use crate::kv_backend::txn::{Txn as KvTxn, TxnResponse as KvTxnResponse};
use crate::kv_backend::{KvBackend, TxnService};
use crate::metrics::METRIC_META_TXN_REQUEST;
use crate::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use crate::rpc::KeyValue;
pub struct KvPair<'a>(&'a etcd_client::KeyValue);
impl<'a> KvPair<'a> {
/// Creates a `KvPair` from etcd KeyValue
#[inline]
pub fn new(kv: &'a etcd_client::KeyValue) -> Self {
Self(kv)
}
#[inline]
pub fn from_etcd_kv(kv: &etcd_client::KeyValue) -> KeyValue {
KeyValue::from(KvPair::new(kv))
}
}
impl<'a> From<KvPair<'a>> for KeyValue {
fn from(kv: KvPair<'a>) -> Self {
Self {
key: kv.0.key().to_vec(),
value: kv.0.value().to_vec(),
}
}
}
// Maximum number of operations permitted in a transaction.
// The etcd default configuration's `--max-txn-ops` is 128.
@@ -48,7 +68,7 @@ pub struct EtcdStore {
}
impl EtcdStore {
pub async fn with_endpoints<E, S>(endpoints: S) -> Result<KvStoreRef>
pub async fn with_endpoints<E, S>(endpoints: S) -> Result<KvBackendRef>
where
E: AsRef<str>,
S: AsRef<[E]>,
@@ -60,7 +80,7 @@ impl EtcdStore {
Ok(Self::with_etcd_client(client))
}
pub fn with_etcd_client(client: Client) -> KvStoreRef {
pub fn with_etcd_client(client: Client) -> KvBackendRef {
Arc::new(Self { client })
}
@@ -289,79 +309,6 @@ impl KvBackend for EtcdStore {
Ok(BatchDeleteResponse { prev_kvs })
}
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
let MoveValue {
from_key,
to_key,
delete_options,
} = req.try_into()?;
let mut client = self.client.kv_client();
// TODO(jiachun): Maybe it's better to let the users control it in the request
const MAX_RETRIES: usize = 8;
for _ in 0..MAX_RETRIES {
let from_key = from_key.as_slice();
let to_key = to_key.as_slice();
let res = client
.get(from_key, None)
.await
.context(error::EtcdFailedSnafu)?;
let txn = match res.kvs().first() {
None => {
// get `to_key` if `from_key` absent
// revision 0 means key was not exist
let compare = Compare::create_revision(from_key, CompareOp::Equal, 0);
let get = TxnOp::get(to_key, None);
Txn::new().when(vec![compare]).and_then(vec![get])
}
Some(kv) => {
// compare `from_key` and move to `to_key`
let value = kv.value();
let compare = Compare::value(from_key, CompareOp::Equal, value);
let delete = TxnOp::delete(from_key, delete_options.clone());
let put = TxnOp::put(to_key, value, None);
Txn::new().when(vec![compare]).and_then(vec![delete, put])
}
};
let txn_res = client.txn(txn).await.context(error::EtcdFailedSnafu)?;
if !txn_res.succeeded() {
warn!(
"Failed to atomically move {:?} to {:?}, try again...",
String::from_utf8_lossy(from_key),
String::from_utf8_lossy(to_key)
);
continue;
}
// [`get_res'] or [`delete_res`, `put_res`], `put_res` will be ignored.
for op_res in txn_res.op_responses() {
match op_res {
TxnOpResponse::Get(res) => {
return Ok(MoveValueResponse(
res.kvs().first().map(KvPair::from_etcd_kv),
));
}
TxnOpResponse::Delete(res) => {
return Ok(MoveValueResponse(
res.prev_kvs().first().map(KvPair::from_etcd_kv),
));
}
_ => {}
}
}
}
error::MoveValueSnafu {
key: String::from_utf8_lossy(&from_key),
}
.fail()
}
}
#[async_trait::async_trait]
@@ -369,10 +316,9 @@ impl TxnService for EtcdStore {
type Error = Error;
async fn txn(&self, txn: KvTxn) -> Result<KvTxnResponse> {
let _timer = timer!(
METRIC_META_TXN_REQUEST,
&[("target", "etcd".to_string()), ("op", "txn".to_string()),]
);
let _timer = METRIC_META_TXN_REQUEST
.with_label_values(&["etcd", "txn"])
.start_timer();
let etcd_txn: Txn = txn.into();
let txn_res = self
@@ -381,7 +327,7 @@ impl TxnService for EtcdStore {
.txn(etcd_txn)
.await
.context(error::EtcdFailedSnafu)?;
txn_res.try_into().context(ConvertEtcdTxnObjectSnafu)
txn_res.try_into()
}
}
@@ -570,26 +516,6 @@ impl TryFrom<DeleteRangeRequest> for Delete {
}
}
struct MoveValue {
from_key: Vec<u8>,
to_key: Vec<u8>,
delete_options: Option<DeleteOptions>,
}
impl TryFrom<MoveValueRequest> for MoveValue {
type Error = Error;
fn try_from(req: MoveValueRequest) -> Result<Self> {
let MoveValueRequest { from_key, to_key } = req;
Ok(MoveValue {
from_key,
to_key,
delete_options: Some(DeleteOptions::default().with_prev_key()),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -701,18 +627,4 @@ mod tests {
assert_eq!(b"test_key".to_vec(), delete.key);
let _ = delete.options.unwrap();
}
#[test]
fn test_parse_move_value() {
let req = MoveValueRequest {
from_key: b"test_from_key".to_vec(),
to_key: b"test_to_key".to_vec(),
};
let move_value: MoveValue = req.try_into().unwrap();
assert_eq!(b"test_from_key".to_vec(), move_value.from_key);
assert_eq!(b"test_to_key".to_vec(), move_value.to_key);
let _ = move_value.delete_options.unwrap();
}
}

View File

@@ -21,17 +21,16 @@ use std::sync::RwLock;
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use common_telemetry::timer;
use serde::Serializer;
use super::ResettableKvBackend;
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
use crate::kv_backend::{KvBackend, TxnService};
use crate::metrics::METRIC_META_TXN_REQUEST;
use crate::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use crate::rpc::KeyValue;
@@ -263,27 +262,6 @@ impl<T: ErrorExt + Send + Sync + 'static> KvBackend for MemoryKvBackend<T> {
Ok(BatchDeleteResponse { prev_kvs })
}
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse, Self::Error> {
let MoveValueRequest { from_key, to_key } = req;
let mut kvs = self.kvs.write().unwrap();
let kv = if let Some(v) = kvs.remove(&from_key) {
kvs.insert(to_key, v.clone());
Some(KeyValue {
key: from_key,
value: v,
})
} else {
kvs.get(&to_key).map(|v| KeyValue {
key: to_key,
value: v.clone(),
})
};
Ok(MoveValueResponse(kv))
}
}
#[async_trait]
@@ -291,10 +269,9 @@ impl<T: ErrorExt + Send + Sync> TxnService for MemoryKvBackend<T> {
type Error = T;
async fn txn(&self, txn: Txn) -> Result<TxnResponse, Self::Error> {
let _timer = timer!(
METRIC_META_TXN_REQUEST,
&[("target", "memory"), ("op", "txn")]
);
let _timer = METRIC_META_TXN_REQUEST
.with_label_values(&["memory", "txn"])
.start_timer();
let TxnRequest {
compare,
@@ -348,6 +325,12 @@ impl<T: ErrorExt + Send + Sync> TxnService for MemoryKvBackend<T> {
}
}
impl<T: ErrorExt + Send + Sync + 'static> ResettableKvBackend for MemoryKvBackend<T> {
fn reset(&self) {
self.clear();
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -358,27 +341,26 @@ mod tests {
prepare_kv, test_kv_batch_delete, test_kv_batch_get, test_kv_compare_and_put,
test_kv_delete_range, test_kv_put, test_kv_range, test_kv_range_2,
};
use crate::kv_backend::KvBackend;
async fn mock_mem_store_with_data() -> MemoryKvBackend<Error> {
let kv_store = MemoryKvBackend::<Error>::new();
prepare_kv(&kv_store).await;
let kv_backend = MemoryKvBackend::<Error>::new();
prepare_kv(&kv_backend).await;
kv_store
kv_backend
}
#[tokio::test]
async fn test_put() {
let kv_store = mock_mem_store_with_data().await;
let kv_backend = mock_mem_store_with_data().await;
test_kv_put(kv_store).await;
test_kv_put(kv_backend).await;
}
#[tokio::test]
async fn test_range() {
let kv_store = mock_mem_store_with_data().await;
let kv_backend = mock_mem_store_with_data().await;
test_kv_range(kv_store).await;
test_kv_range(kv_backend).await;
}
#[tokio::test]
@@ -390,53 +372,29 @@ mod tests {
#[tokio::test]
async fn test_batch_get() {
let kv_store = mock_mem_store_with_data().await;
let kv_backend = mock_mem_store_with_data().await;
test_kv_batch_get(kv_store).await;
test_kv_batch_get(kv_backend).await;
}
#[tokio::test(flavor = "multi_thread")]
async fn test_compare_and_put() {
let kv_store = Arc::new(MemoryKvBackend::<Error>::new());
let kv_backend = Arc::new(MemoryKvBackend::<Error>::new());
test_kv_compare_and_put(kv_store).await;
test_kv_compare_and_put(kv_backend).await;
}
#[tokio::test]
async fn test_delete_range() {
let kv_store = mock_mem_store_with_data().await;
let kv_backend = mock_mem_store_with_data().await;
test_kv_delete_range(kv_store).await;
}
#[tokio::test]
async fn test_move_value() {
let kv_store = mock_mem_store_with_data().await;
let req = MoveValueRequest {
from_key: b"key1".to_vec(),
to_key: b"key111".to_vec(),
};
let resp = kv_store.move_value(req).await.unwrap();
assert_eq!(b"key1", resp.0.as_ref().unwrap().key());
assert_eq!(b"val1", resp.0.as_ref().unwrap().value());
let kv_store = mock_mem_store_with_data().await;
let req = MoveValueRequest {
from_key: b"notexistkey".to_vec(),
to_key: b"key222".to_vec(),
};
let resp = kv_store.move_value(req).await.unwrap();
assert!(resp.0.is_none());
test_kv_delete_range(kv_backend).await;
}
#[tokio::test]
async fn test_batch_delete() {
let kv_store = mock_mem_store_with_data().await;
let kv_backend = mock_mem_store_with_data().await;
test_kv_batch_delete(kv_store).await;
test_kv_batch_delete(kv_backend).await;
}
}

View File

@@ -38,9 +38,9 @@ pub fn mock_kvs() -> Vec<KeyValue> {
]
}
pub async fn prepare_kv(kv_store: &impl KvBackend) {
pub async fn prepare_kv(kv_backend: &impl KvBackend) {
let kvs = mock_kvs();
assert!(kv_store
assert!(kv_backend
.batch_put(BatchPutRequest {
kvs,
..Default::default()
@@ -48,7 +48,7 @@ pub async fn prepare_kv(kv_store: &impl KvBackend) {
.await
.is_ok());
assert!(kv_store
assert!(kv_backend
.put(PutRequest {
key: b"key11".to_vec(),
value: b"val11".to_vec(),
@@ -58,8 +58,8 @@ pub async fn prepare_kv(kv_store: &impl KvBackend) {
.is_ok());
}
pub async fn test_kv_put(kv_store: impl KvBackend) {
let resp = kv_store
pub async fn test_kv_put(kv_backend: impl KvBackend) {
let resp = kv_backend
.put(PutRequest {
key: b"key11".to_vec(),
value: b"val12".to_vec(),
@@ -69,7 +69,7 @@ pub async fn test_kv_put(kv_store: impl KvBackend) {
.unwrap();
assert!(resp.prev_kv.is_none());
let resp = kv_store
let resp = kv_backend
.put(PutRequest {
key: b"key11".to_vec(),
value: b"val13".to_vec(),
@@ -82,11 +82,11 @@ pub async fn test_kv_put(kv_store: impl KvBackend) {
assert_eq!(b"val12", prev_kv.value());
}
pub async fn test_kv_range(kv_store: impl KvBackend) {
pub async fn test_kv_range(kv_backend: impl KvBackend) {
let key = b"key1".to_vec();
let range_end = util::get_prefix_end_key(b"key1");
let resp = kv_store
let resp = kv_backend
.range(RangeRequest {
key: key.clone(),
range_end: range_end.clone(),
@@ -102,7 +102,7 @@ pub async fn test_kv_range(kv_store: impl KvBackend) {
assert_eq!(b"key11", resp.kvs[1].key());
assert_eq!(b"val11", resp.kvs[1].value());
let resp = kv_store
let resp = kv_backend
.range(RangeRequest {
key: key.clone(),
range_end: range_end.clone(),
@@ -118,7 +118,7 @@ pub async fn test_kv_range(kv_store: impl KvBackend) {
assert_eq!(b"key11", resp.kvs[1].key());
assert_eq!(b"", resp.kvs[1].value());
let resp = kv_store
let resp = kv_backend
.range(RangeRequest {
key: key.clone(),
limit: 0,
@@ -132,7 +132,7 @@ pub async fn test_kv_range(kv_store: impl KvBackend) {
assert_eq!(b"key1", resp.kvs[0].key());
assert_eq!(b"val1", resp.kvs[0].value());
let resp = kv_store
let resp = kv_backend
.range(RangeRequest {
key,
range_end,
@@ -147,19 +147,19 @@ pub async fn test_kv_range(kv_store: impl KvBackend) {
assert_eq!(b"val1", resp.kvs[0].value());
}
pub async fn test_kv_range_2(kv_store: impl KvBackend) {
kv_store
pub async fn test_kv_range_2(kv_backend: impl KvBackend) {
kv_backend
.put(PutRequest::new().with_key("atest").with_value("value"))
.await
.unwrap();
kv_store
kv_backend
.put(PutRequest::new().with_key("test").with_value("value"))
.await
.unwrap();
// If both key and range_end are \0, then range represents all keys.
let result = kv_store
let result = kv_backend
.range(RangeRequest::new().with_range(b"\0".to_vec(), b"\0".to_vec()))
.await
.unwrap();
@@ -168,14 +168,14 @@ pub async fn test_kv_range_2(kv_store: impl KvBackend) {
assert!(!result.more);
// If range_end is \0, the range is all keys greater than or equal to the key argument.
let result = kv_store
let result = kv_backend
.range(RangeRequest::new().with_range(b"a".to_vec(), b"\0".to_vec()))
.await
.unwrap();
assert_eq!(result.kvs.len(), 2);
let result = kv_store
let result = kv_backend
.range(RangeRequest::new().with_range(b"b".to_vec(), b"\0".to_vec()))
.await
.unwrap();
@@ -184,7 +184,7 @@ pub async fn test_kv_range_2(kv_store: impl KvBackend) {
assert_eq!(result.kvs[0].key, b"test");
// Fetches the keys >= "a", set limit to 1, the `more` should be true.
let result = kv_store
let result = kv_backend
.range(
RangeRequest::new()
.with_range(b"a".to_vec(), b"\0".to_vec())
@@ -196,7 +196,7 @@ pub async fn test_kv_range_2(kv_store: impl KvBackend) {
assert!(result.more);
// Fetches the keys >= "a", set limit to 2, the `more` should be false.
let result = kv_store
let result = kv_backend
.range(
RangeRequest::new()
.with_range(b"a".to_vec(), b"\0".to_vec())
@@ -208,7 +208,7 @@ pub async fn test_kv_range_2(kv_store: impl KvBackend) {
assert!(!result.more);
// Fetches the keys >= "a", set limit to 3, the `more` should be false.
let result = kv_store
let result = kv_backend
.range(
RangeRequest::new()
.with_range(b"a".to_vec(), b"\0".to_vec())
@@ -220,19 +220,28 @@ pub async fn test_kv_range_2(kv_store: impl KvBackend) {
assert!(!result.more);
}
pub async fn test_kv_batch_get(kv_store: impl KvBackend) {
pub async fn test_kv_batch_get(kv_backend: impl KvBackend) {
let keys = vec![];
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
let resp = kv_backend
.batch_get(BatchGetRequest { keys })
.await
.unwrap();
assert!(resp.kvs.is_empty());
let keys = vec![b"key10".to_vec()];
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
let resp = kv_backend
.batch_get(BatchGetRequest { keys })
.await
.unwrap();
assert!(resp.kvs.is_empty());
let keys = vec![b"key1".to_vec(), b"key3".to_vec(), b"key4".to_vec()];
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
let resp = kv_backend
.batch_get(BatchGetRequest { keys })
.await
.unwrap();
assert_eq!(2, resp.kvs.len());
assert_eq!(b"key1", resp.kvs[0].key());
@@ -241,12 +250,12 @@ pub async fn test_kv_batch_get(kv_store: impl KvBackend) {
assert_eq!(b"val3", resp.kvs[1].value());
}
pub async fn test_kv_compare_and_put(kv_store: Arc<dyn KvBackend<Error = Error>>) {
pub async fn test_kv_compare_and_put(kv_backend: Arc<dyn KvBackend<Error = Error>>) {
let success = Arc::new(AtomicU8::new(0));
let mut joins = vec![];
for _ in 0..20 {
let kv_store_clone = kv_store.clone();
let kv_backend_clone = kv_backend.clone();
let success_clone = success.clone();
let join = tokio::spawn(async move {
let req = CompareAndPutRequest {
@@ -254,7 +263,7 @@ pub async fn test_kv_compare_and_put(kv_store: Arc<dyn KvBackend<Error = Error>>
expect: vec![],
value: b"val_new".to_vec(),
};
let resp = kv_store_clone.compare_and_put(req).await.unwrap();
let resp = kv_backend_clone.compare_and_put(req).await.unwrap();
if resp.success {
success_clone.fetch_add(1, Ordering::SeqCst);
}
@@ -269,20 +278,20 @@ pub async fn test_kv_compare_and_put(kv_store: Arc<dyn KvBackend<Error = Error>>
assert_eq!(1, success.load(Ordering::SeqCst));
}
pub async fn test_kv_delete_range(kv_store: impl KvBackend) {
pub async fn test_kv_delete_range(kv_backend: impl KvBackend) {
let req = DeleteRangeRequest {
key: b"key3".to_vec(),
range_end: vec![],
prev_kv: true,
};
let resp = kv_store.delete_range(req).await.unwrap();
let resp = kv_backend.delete_range(req).await.unwrap();
assert_eq!(1, resp.prev_kvs.len());
assert_eq!(1, resp.deleted);
assert_eq!(b"key3", resp.prev_kvs[0].key());
assert_eq!(b"val3", resp.prev_kvs[0].value());
let resp = kv_store.get(b"key3").await.unwrap();
let resp = kv_backend.get(b"key3").await.unwrap();
assert!(resp.is_none());
let req = DeleteRangeRequest {
@@ -291,11 +300,11 @@ pub async fn test_kv_delete_range(kv_store: impl KvBackend) {
prev_kv: false,
};
let resp = kv_store.delete_range(req).await.unwrap();
let resp = kv_backend.delete_range(req).await.unwrap();
assert_eq!(1, resp.deleted);
assert!(resp.prev_kvs.is_empty());
let resp = kv_store.get(b"key2").await.unwrap();
let resp = kv_backend.get(b"key2").await.unwrap();
assert!(resp.is_none());
let key = b"key1".to_vec();
@@ -306,7 +315,7 @@ pub async fn test_kv_delete_range(kv_store: impl KvBackend) {
range_end: range_end.clone(),
prev_kv: true,
};
let resp = kv_store.delete_range(req).await.unwrap();
let resp = kv_backend.delete_range(req).await.unwrap();
assert_eq!(2, resp.prev_kvs.len());
let req = RangeRequest {
@@ -314,19 +323,19 @@ pub async fn test_kv_delete_range(kv_store: impl KvBackend) {
range_end,
..Default::default()
};
let resp = kv_store.range(req).await.unwrap();
let resp = kv_backend.range(req).await.unwrap();
assert!(resp.kvs.is_empty());
}
pub async fn test_kv_batch_delete(kv_store: impl KvBackend) {
assert!(kv_store.get(b"key1").await.unwrap().is_some());
assert!(kv_store.get(b"key100").await.unwrap().is_none());
pub async fn test_kv_batch_delete(kv_backend: impl KvBackend) {
assert!(kv_backend.get(b"key1").await.unwrap().is_some());
assert!(kv_backend.get(b"key100").await.unwrap().is_none());
let req = BatchDeleteRequest {
keys: vec![b"key1".to_vec(), b"key100".to_vec()],
prev_kv: true,
};
let resp = kv_store.batch_delete(req).await.unwrap();
let resp = kv_backend.batch_delete(req).await.unwrap();
assert_eq!(1, resp.prev_kvs.len());
assert_eq!(
vec![KeyValue {
@@ -335,18 +344,18 @@ pub async fn test_kv_batch_delete(kv_store: impl KvBackend) {
}],
resp.prev_kvs
);
assert!(kv_store.get(b"key1").await.unwrap().is_none());
assert!(kv_backend.get(b"key1").await.unwrap().is_none());
assert!(kv_store.get(b"key2").await.unwrap().is_some());
assert!(kv_store.get(b"key3").await.unwrap().is_some());
assert!(kv_backend.get(b"key2").await.unwrap().is_some());
assert!(kv_backend.get(b"key3").await.unwrap().is_some());
let req = BatchDeleteRequest {
keys: vec![b"key2".to_vec(), b"key3".to_vec()],
prev_kv: false,
};
let resp = kv_store.batch_delete(req).await.unwrap();
let resp = kv_backend.batch_delete(req).await.unwrap();
assert!(resp.prev_kvs.is_empty());
assert!(kv_store.get(b"key2").await.unwrap().is_none());
assert!(kv_store.get(b"key3").await.unwrap().is_none());
assert!(kv_backend.get(b"key2").await.unwrap().is_none());
assert!(kv_backend.get(b"key3").await.unwrap().is_none());
}

View File

@@ -268,9 +268,9 @@ mod tests {
#[tokio::test]
async fn test_txn_one_compare_op() {
let kv_store = create_kv_store().await;
let kv_backend = create_kv_backend().await;
let _ = kv_store
let _ = kv_backend
.put(PutRequest {
key: vec![11],
value: vec![3],
@@ -288,7 +288,7 @@ mod tests {
.and_then(vec![TxnOp::Put(vec![11], vec![1])])
.or_else(vec![TxnOp::Put(vec![11], vec![2])]);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(txn_response.succeeded);
assert_eq!(txn_response.responses.len(), 1);
@@ -296,10 +296,10 @@ mod tests {
#[tokio::test]
async fn test_txn_multi_compare_op() {
let kv_store = create_kv_store().await;
let kv_backend = create_kv_backend().await;
for i in 1..3 {
let _ = kv_store
let _ = kv_backend
.put(PutRequest {
key: vec![i],
value: vec![i],
@@ -321,7 +321,7 @@ mod tests {
])
.or_else(vec![TxnOp::Put(vec![1], vec![11])]);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(txn_response.succeeded);
assert_eq!(txn_response.responses.len(), 2);
@@ -329,9 +329,9 @@ mod tests {
#[tokio::test]
async fn test_txn_compare_equal() {
let kv_store = create_kv_store().await;
let kv_backend = create_kv_backend().await;
let key = vec![101u8];
kv_store.delete(&key, false).await.unwrap();
kv_backend.delete(&key, false).await.unwrap();
let txn = Txn::new()
.when(vec![Compare::with_not_exist_value(
@@ -340,10 +340,10 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![1])])
.or_else(vec![TxnOp::Put(key.clone(), vec![2])]);
let txn_response = kv_store.txn(txn.clone()).await.unwrap();
let txn_response = kv_backend.txn(txn.clone()).await.unwrap();
assert!(txn_response.succeeded);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(!txn_response.succeeded);
let txn = Txn::new()
@@ -354,15 +354,15 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![3])])
.or_else(vec![TxnOp::Put(key, vec![4])]);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(txn_response.succeeded);
}
#[tokio::test]
async fn test_txn_compare_greater() {
let kv_store = create_kv_store().await;
let kv_backend = create_kv_backend().await;
let key = vec![102u8];
kv_store.delete(&key, false).await.unwrap();
kv_backend.delete(&key, false).await.unwrap();
let txn = Txn::new()
.when(vec![Compare::with_not_exist_value(
@@ -371,10 +371,10 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![1])])
.or_else(vec![TxnOp::Put(key.clone(), vec![2])]);
let txn_response = kv_store.txn(txn.clone()).await.unwrap();
let txn_response = kv_backend.txn(txn.clone()).await.unwrap();
assert!(!txn_response.succeeded);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(txn_response.succeeded);
let txn = Txn::new()
@@ -385,7 +385,7 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![3])])
.or_else(vec![TxnOp::Get(key.clone())]);
let mut txn_response = kv_store.txn(txn).await.unwrap();
let mut txn_response = kv_backend.txn(txn).await.unwrap();
assert!(!txn_response.succeeded);
let res = txn_response.responses.pop().unwrap();
assert_eq!(
@@ -402,9 +402,9 @@ mod tests {
#[tokio::test]
async fn test_txn_compare_less() {
let kv_store = create_kv_store().await;
let kv_backend = create_kv_backend().await;
let key = vec![103u8];
kv_store.delete(&[3], false).await.unwrap();
kv_backend.delete(&[3], false).await.unwrap();
let txn = Txn::new()
.when(vec![Compare::with_not_exist_value(
@@ -413,10 +413,10 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![1])])
.or_else(vec![TxnOp::Put(key.clone(), vec![2])]);
let txn_response = kv_store.txn(txn.clone()).await.unwrap();
let txn_response = kv_backend.txn(txn.clone()).await.unwrap();
assert!(!txn_response.succeeded);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(!txn_response.succeeded);
let txn = Txn::new()
@@ -427,7 +427,7 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![3])])
.or_else(vec![TxnOp::Get(key.clone())]);
let mut txn_response = kv_store.txn(txn).await.unwrap();
let mut txn_response = kv_backend.txn(txn).await.unwrap();
assert!(!txn_response.succeeded);
let res = txn_response.responses.pop().unwrap();
assert_eq!(
@@ -444,9 +444,9 @@ mod tests {
#[tokio::test]
async fn test_txn_compare_not_equal() {
let kv_store = create_kv_store().await;
let kv_backend = create_kv_backend().await;
let key = vec![104u8];
kv_store.delete(&key, false).await.unwrap();
kv_backend.delete(&key, false).await.unwrap();
let txn = Txn::new()
.when(vec![Compare::with_not_exist_value(
@@ -455,10 +455,10 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![1])])
.or_else(vec![TxnOp::Put(key.clone(), vec![2])]);
let txn_response = kv_store.txn(txn.clone()).await.unwrap();
let txn_response = kv_backend.txn(txn.clone()).await.unwrap();
assert!(!txn_response.succeeded);
let txn_response = kv_store.txn(txn).await.unwrap();
let txn_response = kv_backend.txn(txn).await.unwrap();
assert!(txn_response.succeeded);
let txn = Txn::new()
@@ -469,7 +469,7 @@ mod tests {
)])
.and_then(vec![TxnOp::Put(key.clone(), vec![3])])
.or_else(vec![TxnOp::Get(key.clone())]);
let mut txn_response = kv_store.txn(txn).await.unwrap();
let mut txn_response = kv_backend.txn(txn).await.unwrap();
assert!(!txn_response.succeeded);
let res = txn_response.responses.pop().unwrap();
assert_eq!(
@@ -484,7 +484,7 @@ mod tests {
);
}
async fn create_kv_store() -> KvBackendRef {
async fn create_kv_backend() -> KvBackendRef {
Arc::new(MemoryKvBackend::<Error>::new())
// TODO(jiachun): Add a feature to test against etcd in github CI
//

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(assert_matches)]
#![feature(btree_extract_if)]
#![feature(async_closure)]

View File

@@ -12,11 +12,42 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub const METRIC_META_TXN_REQUEST: &str = "meta.txn_request";
use lazy_static::lazy_static;
use prometheus::*;
pub(crate) const METRIC_META_CREATE_CATALOG: &str = "meta.create_catalog";
pub(crate) const METRIC_META_CREATE_SCHEMA: &str = "meta.create_schema";
pub(crate) const METRIC_META_PROCEDURE_CREATE_TABLE: &str = "meta.procedure.create_table";
pub(crate) const METRIC_META_PROCEDURE_DROP_TABLE: &str = "meta.procedure.drop_table";
pub(crate) const METRIC_META_PROCEDURE_ALTER_TABLE: &str = "meta.procedure.alter_table";
pub(crate) const METRIC_META_PROCEDURE_TRUNCATE_TABLE: &str = "meta.procedure.truncate_table";
lazy_static! {
pub static ref METRIC_META_TXN_REQUEST: HistogramVec =
register_histogram_vec!("meta_txn_request", "meta txn request", &["target", "op"]).unwrap();
pub static ref METRIC_META_CREATE_CATALOG: Histogram =
register_histogram!("meta_create_catalog", "meta create catalog").unwrap();
pub static ref METRIC_META_CREATE_CATALOG_COUNTER: IntCounter =
register_int_counter!("meta_create_catalog_counter", "meta create catalog").unwrap();
pub static ref METRIC_META_CREATE_SCHEMA: Histogram =
register_histogram!("meta_create_schema", "meta create schema").unwrap();
pub static ref METRIC_META_CREATE_SCHEMA_COUNTER: IntCounter =
register_int_counter!("meta_create_schema_counter", "meta create schema").unwrap();
pub static ref METRIC_META_PROCEDURE_CREATE_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_create_table",
"meta procedure create table",
&["step"]
)
.unwrap();
pub static ref METRIC_META_PROCEDURE_DROP_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_drop_table",
"meta procedure drop table",
&["step"]
)
.unwrap();
pub static ref METRIC_META_PROCEDURE_ALTER_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_alter_table",
"meta procedure alter table",
&["step"]
)
.unwrap();
pub static ref METRIC_META_PROCEDURE_TRUNCATE_TABLE: HistogramVec = register_histogram_vec!(
"meta_procedure_truncate_table",
"meta procedure truncate table",
&["step"]
)
.unwrap();
}

View File

@@ -49,6 +49,14 @@ impl Peer {
addr: addr.into(),
}
}
#[cfg(any(test, feature = "testing"))]
pub fn empty(id: u64) -> Self {
Self {
id,
addr: String::new(),
}
}
}
impl Display for Peer {

View File

@@ -17,10 +17,12 @@ use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use common_telemetry::debug;
use futures::future::BoxFuture;
use futures::{ready, FutureExt, Stream};
use snafu::ensure;
use crate::error::Result;
use crate::error::{self, Result};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::{RangeRequest, RangeResponse};
use crate::rpc::KeyValue;
@@ -39,7 +41,16 @@ enum PaginationStreamState<K, V> {
Error,
}
pub const DEFAULT_PAGE_SIZE: usize = 512;
/// The Range Request's default page size.
///
/// It dependents on upstream KvStore server side grpc message size limitation.
/// (e.g., etcd has default grpc message size limitation is 4MiB)
///
/// Generally, almost all metadata is smaller than is 2700 Byte.
/// Therefore, We can set the [DEFAULT_PAGE_SIZE] to 1536 statically.
///
/// TODO(weny): Considers updating the default page size dynamically.
pub const DEFAULT_PAGE_SIZE: usize = 1536;
struct PaginationStreamFactory {
kv: KvBackendRef,
@@ -55,10 +66,13 @@ struct PaginationStreamFactory {
pub range_end: Vec<u8>,
/// page_size is the pagination page size.
pub page_size: usize,
page_size: usize,
/// keys_only when set returns only the keys and not the values.
pub keys_only: bool,
/// It reduces the page size if the response size exceeds the limit.
pub adaptive_page_size: usize,
pub more: bool,
}
@@ -78,19 +92,58 @@ impl PaginationStreamFactory {
page_size,
keys_only,
more,
adaptive_page_size: if page_size == 0 {
DEFAULT_ADAPTIVE_PAGE_SIZE
} else {
page_size
},
}
}
}
const DEFAULT_ADAPTIVE_PAGE_SIZE: usize = 1024;
impl PaginationStreamFactory {
pub async fn read_next(self) -> Result<(Self, Option<RangeResponse>)> {
fn try_reduce_adaptive_page_size(&mut self) -> Result<()> {
self.adaptive_page_size /= 2;
ensure!(
self.adaptive_page_size != 0,
error::UnexpectedSnafu {
err_msg: "Exceeded maximum number of adaptive range retries"
}
);
Ok(())
}
/// Decreases the `page size` if the response message size exceeds the limitation.
/// TODO(weny): Considers to add an E2e test.
#[async_recursion::async_recursion]
async fn adaptive_range(&mut self, req: RangeRequest) -> Result<RangeResponse> {
match self.kv.range(req.clone()).await {
Ok(resp) => Ok(resp),
Err(err) => {
if err.is_exceeded_size_limit() {
self.try_reduce_adaptive_page_size()?;
debug!("Reset page_size to {}", self.adaptive_page_size);
self.adaptive_range(req.with_limit(self.adaptive_page_size as i64))
.await
} else {
Err(err)
}
}
}
}
pub async fn read_next(mut self) -> Result<(Self, Option<RangeResponse>)> {
if self.more {
let resp = self
.kv
.range(RangeRequest {
.adaptive_range(RangeRequest {
key: self.key.clone(),
range_end: self.range_end.clone(),
limit: self.page_size as i64,
limit: self.adaptive_page_size as i64,
keys_only: self.keys_only,
})
.await?;
@@ -111,6 +164,7 @@ impl PaginationStreamFactory {
page_size: self.page_size,
keys_only: self.keys_only,
more: resp.more,
adaptive_page_size: self.adaptive_page_size,
},
Some(resp),
))
@@ -214,6 +268,7 @@ impl<K, V> Stream for PaginationStream<K, V> {
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::collections::BTreeMap;
use futures::TryStreamExt;
@@ -228,12 +283,47 @@ mod tests {
Ok((kv.key.clone(), kv.value))
}
#[test]
fn test_try_reduce_page_size() {
let kv_backend = Arc::new(MemoryKvBackend::<Error>::new()) as _;
let mut factory =
PaginationStreamFactory::new(&kv_backend, vec![], vec![], 2, false, false);
// new adaptive page size: 1
factory.try_reduce_adaptive_page_size().unwrap();
// new adaptive page size: 0
assert_matches!(
factory.try_reduce_adaptive_page_size().unwrap_err(),
error::Error::Unexpected { .. }
);
let mut factory =
PaginationStreamFactory::new(&kv_backend, vec![], vec![], 1024, false, false);
factory.try_reduce_adaptive_page_size().unwrap();
assert_eq!(factory.adaptive_page_size, 512);
factory.try_reduce_adaptive_page_size().unwrap();
assert_eq!(factory.adaptive_page_size, 256);
let mut factory =
PaginationStreamFactory::new(&kv_backend, vec![], vec![], 0, false, false);
factory.try_reduce_adaptive_page_size().unwrap();
assert_eq!(factory.adaptive_page_size, DEFAULT_ADAPTIVE_PAGE_SIZE / 2);
}
#[tokio::test]
async fn test_range_empty() {
let kv_store = Arc::new(MemoryKvBackend::<Error>::new());
let kv_backend = Arc::new(MemoryKvBackend::<Error>::new());
let stream = PaginationStream::new(
kv_store.clone(),
kv_backend.clone(),
RangeRequest {
key: b"a".to_vec(),
..Default::default()
@@ -248,14 +338,14 @@ mod tests {
#[tokio::test]
async fn test_range() {
let kv_store = Arc::new(MemoryKvBackend::<Error>::new());
let kv_backend = Arc::new(MemoryKvBackend::<Error>::new());
let total = 26;
let mut expected = BTreeMap::<Vec<u8>, ()>::new();
for i in 0..total {
let key = vec![97 + i];
assert!(kv_store
assert!(kv_backend
.put(PutRequest {
key: key.clone(),
value: key.clone(),
@@ -271,7 +361,7 @@ mod tests {
let range_end = b"f".to_vec();
let stream = PaginationStream::new(
kv_store.clone(),
kv_backend.clone(),
RangeRequest {
key,
range_end,

View File

@@ -15,65 +15,19 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use api::v1::meta::{
Partition as PbPartition, Peer as PbPeer, Region as PbRegion, RegionRoute as PbRegionRoute,
RouteRequest as PbRouteRequest, RouteResponse as PbRouteResponse, Table as PbTable,
TableId as PbTableId, TableRoute as PbTableRoute, TableRouteValue as PbTableRouteValue,
Partition as PbPartition, Peer as PbPeer, Region as PbRegion, Table as PbTable,
TableRoute as PbTableRoute,
};
use serde::ser::SerializeSeq;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use snafu::OptionExt;
use store_api::storage::{RegionId, RegionNumber};
use table::metadata::TableId;
use crate::error::{self, Result};
use crate::key::RegionDistribution;
use crate::peer::Peer;
use crate::rpc::util;
use crate::table_name::TableName;
#[derive(Debug, Clone, Default)]
pub struct RouteRequest {
pub table_ids: Vec<TableId>,
}
impl From<RouteRequest> for PbRouteRequest {
fn from(mut req: RouteRequest) -> Self {
Self {
header: None,
table_ids: req.table_ids.drain(..).map(|id| PbTableId { id }).collect(),
}
}
}
impl RouteRequest {
#[inline]
pub fn new(table_id: TableId) -> Self {
Self {
table_ids: vec![table_id],
}
}
}
#[derive(Debug, Clone)]
pub struct RouteResponse {
pub table_routes: Vec<TableRoute>,
}
impl TryFrom<PbRouteResponse> for RouteResponse {
type Error = error::Error;
fn try_from(pb: PbRouteResponse) -> Result<Self> {
util::check_response_header(pb.header.as_ref())?;
let table_routes = pb
.table_routes
.into_iter()
.map(|x| TableRoute::try_from_raw(&pb.peers, x))
.collect::<Result<Vec<_>>>()?;
Ok(Self { table_routes })
}
}
pub fn region_distribution(region_routes: &[RegionRoute]) -> Result<RegionDistribution> {
let mut regions_id_map = RegionDistribution::new();
for route in region_routes.iter() {
@@ -104,7 +58,10 @@ pub fn find_leaders(region_routes: &[RegionRoute]) -> HashSet<Peer> {
.collect()
}
pub fn convert_to_region_map(region_routes: &[RegionRoute]) -> HashMap<u32, &Peer> {
/// Returns the HashMap<[RegionNumber], &[Peer]>;
///
/// If the region doesn't have a leader peer, the [Region] will be omitted.
pub fn convert_to_region_leader_map(region_routes: &[RegionRoute]) -> HashMap<RegionNumber, &Peer> {
region_routes
.iter()
.filter_map(|x| {
@@ -115,7 +72,10 @@ pub fn convert_to_region_map(region_routes: &[RegionRoute]) -> HashMap<u32, &Pee
.collect::<HashMap<_, _>>()
}
pub fn find_region_leader(region_routes: &[RegionRoute], region_number: u32) -> Option<&Peer> {
pub fn find_region_leader(
region_routes: &[RegionRoute],
region_number: RegionNumber,
) -> Option<&Peer> {
region_routes
.iter()
.find(|x| x.region.id.region_number() == region_number)
@@ -194,112 +154,12 @@ impl TableRoute {
region,
leader_peer,
follower_peers,
leader_status: None,
});
}
Ok(Self::new(table, region_routes))
}
pub fn try_into_raw(self) -> Result<(Vec<PbPeer>, PbTableRoute)> {
let mut peers = HashSet::new();
self.region_routes
.iter()
.filter_map(|x| x.leader_peer.as_ref())
.for_each(|p| {
let _ = peers.insert(p.clone());
});
self.region_routes
.iter()
.flat_map(|x| x.follower_peers.iter())
.for_each(|p| {
let _ = peers.insert(p.clone());
});
let mut peers = peers.into_iter().map(Into::into).collect::<Vec<PbPeer>>();
peers.sort_by_key(|x| x.id);
let find_peer = |peer_id: u64| -> u64 {
peers
.iter()
.enumerate()
.find_map(|(i, x)| {
if x.id == peer_id {
Some(i as u64)
} else {
None
}
})
.unwrap_or_else(|| {
panic!("Peer {peer_id} must be present when collecting all peers.")
})
};
let mut region_routes = Vec::with_capacity(self.region_routes.len());
for region_route in self.region_routes.into_iter() {
let leader_peer_index = region_route.leader_peer.map(|x| find_peer(x.id)).context(
error::RouteInfoCorruptedSnafu {
err_msg: "'leader_peer' is empty in region route",
},
)?;
let follower_peer_indexes = region_route
.follower_peers
.iter()
.map(|x| find_peer(x.id))
.collect::<Vec<_>>();
region_routes.push(PbRegionRoute {
region: Some(region_route.region.into()),
leader_peer_index,
follower_peer_indexes,
});
}
let table_route = PbTableRoute {
table: Some(self.table.into()),
region_routes,
};
Ok((peers, table_route))
}
pub fn find_leaders(&self) -> HashSet<Peer> {
find_leaders(&self.region_routes)
}
pub fn find_leader_regions(&self, datanode: &Peer) -> Vec<RegionNumber> {
find_leader_regions(&self.region_routes, datanode)
}
pub fn find_region_leader(&self, region_number: RegionNumber) -> Option<&Peer> {
self.region_leaders
.get(&region_number)
.and_then(|x| x.as_ref())
}
}
impl TryFrom<PbTableRouteValue> for TableRoute {
type Error = error::Error;
fn try_from(pb: PbTableRouteValue) -> Result<Self> {
TableRoute::try_from_raw(
&pb.peers,
pb.table_route.context(error::InvalidProtoMsgSnafu {
err_msg: "expected table_route",
})?,
)
}
}
impl TryFrom<TableRoute> for PbTableRouteValue {
type Error = error::Error;
fn try_from(table_route: TableRoute) -> Result<Self> {
let (peers, table_route) = table_route.try_into_raw()?;
Ok(PbTableRouteValue {
peers,
table_route: Some(table_route),
})
}
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
@@ -343,17 +203,56 @@ pub struct RegionRoute {
pub region: Region,
pub leader_peer: Option<Peer>,
pub follower_peers: Vec<Peer>,
/// `None` by default.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub leader_status: Option<RegionStatus>,
}
/// The Status of the [Region].
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
pub enum RegionStatus {
/// The following cases in which the [Region] will be downgraded.
///
/// - The [Region] is unavailable(e.g., Crashed, Network disconnected).
/// - The [Region] was planned to migrate to another [Peer].
Downgraded,
}
impl RegionRoute {
/// Returns true if the Leader [Region] is downgraded.
///
/// The following cases in which the [Region] will be downgraded.
///
/// - The [Region] is unavailable(e.g., Crashed, Network disconnected).
/// - The [Region] was planned to migrate to another [Peer].
///
pub fn is_leader_downgraded(&self) -> bool {
matches!(self.leader_status, Some(RegionStatus::Downgraded))
}
/// Marks the Leader [Region] as downgraded.
///
/// We should downgrade a [Region] before deactivating it:
///
/// - During the [Region] Failover Procedure.
/// - Migrating a [Region].
///
/// **Notes:** Meta Server will stop renewing the lease for the downgraded [Region].
///
pub fn downgrade_leader(&mut self) {
self.leader_status = Some(RegionStatus::Downgraded)
}
}
pub struct RegionRoutes(pub Vec<RegionRoute>);
impl RegionRoutes {
pub fn region_map(&self) -> HashMap<u32, &Peer> {
convert_to_region_map(&self.0)
pub fn region_leader_map(&self) -> HashMap<RegionNumber, &Peer> {
convert_to_region_leader_map(&self.0)
}
pub fn find_region_leader(&self, region_number: u32) -> Option<&Peer> {
self.region_map().get(&region_number).copied()
pub fn find_region_leader(&self, region_number: RegionNumber) -> Option<&Peer> {
self.region_leader_map().get(&region_number).copied()
}
}
@@ -365,6 +264,16 @@ pub struct Region {
pub attrs: BTreeMap<String, String>,
}
impl Region {
#[cfg(any(test, feature = "testing"))]
pub fn new_test(id: RegionId) -> Self {
Self {
id,
..Default::default()
}
}
}
impl From<PbRegion> for Region {
fn from(r: PbRegion) -> Self {
Self {
@@ -456,14 +365,50 @@ impl From<PbPartition> for Partition {
#[cfg(test)]
mod tests {
use api::v1::meta::{
Partition as PbPartition, Peer as PbPeer, Region as PbRegion, RegionRoute as PbRegionRoute,
RouteRequest as PbRouteRequest, RouteResponse as PbRouteResponse, Table as PbTable,
TableName as PbTableName, TableRoute as PbTableRoute,
};
use super::*;
#[test]
fn test_leader_is_downgraded() {
let mut region_route = RegionRoute {
region: Region {
id: 2.into(),
name: "r2".to_string(),
partition: None,
attrs: BTreeMap::new(),
},
leader_peer: Some(Peer::new(1, "a1")),
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
leader_status: None,
};
assert!(!region_route.is_leader_downgraded());
region_route.downgrade_leader();
assert!(region_route.is_leader_downgraded());
}
#[test]
fn test_region_route_decode() {
let region_route = RegionRoute {
region: Region {
id: 2.into(),
name: "r2".to_string(),
partition: None,
attrs: BTreeMap::new(),
},
leader_peer: Some(Peer::new(1, "a1")),
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
leader_status: None,
};
let input = r#"{"region":{"id":2,"name":"r2","partition":null,"attrs":{}},"leader_peer":{"id":1,"addr":"a1"},"follower_peers":[{"id":2,"addr":"a2"},{"id":3,"addr":"a3"}]}"#;
let decoded: RegionRoute = serde_json::from_str(input).unwrap();
assert_eq!(decoded, region_route);
}
#[test]
fn test_de_serialize_partition() {
let p = Partition {
@@ -476,182 +421,4 @@ mod tests {
assert_eq!(got, p);
}
#[test]
fn test_route_request_trans() {
let req = RouteRequest {
table_ids: vec![1, 2],
};
let into_req: PbRouteRequest = req.into();
assert!(into_req.header.is_none());
assert_eq!(1, into_req.table_ids.get(0).unwrap().id);
assert_eq!(2, into_req.table_ids.get(1).unwrap().id);
}
#[test]
fn test_route_response_trans() {
let res = PbRouteResponse {
header: None,
peers: vec![
PbPeer {
id: 1,
addr: "peer1".to_string(),
},
PbPeer {
id: 2,
addr: "peer2".to_string(),
},
],
table_routes: vec![PbTableRoute {
table: Some(PbTable {
id: 1,
table_name: Some(PbTableName {
catalog_name: "c1".to_string(),
schema_name: "s1".to_string(),
table_name: "t1".to_string(),
}),
table_schema: b"schema".to_vec(),
}),
region_routes: vec![PbRegionRoute {
region: Some(PbRegion {
id: 1,
name: "region1".to_string(),
partition: Some(PbPartition {
column_list: vec![b"c1".to_vec(), b"c2".to_vec()],
value_list: vec![b"v1".to_vec(), b"v2".to_vec()],
}),
attrs: Default::default(),
}),
leader_peer_index: 0,
follower_peer_indexes: vec![1],
}],
}],
};
let res: RouteResponse = res.try_into().unwrap();
let mut table_routes = res.table_routes;
assert_eq!(1, table_routes.len());
let table_route = table_routes.remove(0);
let table = table_route.table;
assert_eq!(1, table.id);
assert_eq!("c1", table.table_name.catalog_name);
assert_eq!("s1", table.table_name.schema_name);
assert_eq!("t1", table.table_name.table_name);
let mut region_routes = table_route.region_routes;
assert_eq!(1, region_routes.len());
let region_route = region_routes.remove(0);
let region = region_route.region;
assert_eq!(1, region.id);
assert_eq!("region1", region.name);
let partition = region.partition.unwrap();
assert_eq!(vec![b"c1".to_vec(), b"c2".to_vec()], partition.column_list);
assert_eq!(vec![b"v1".to_vec(), b"v2".to_vec()], partition.value_list);
assert_eq!(1, region_route.leader_peer.as_ref().unwrap().id);
assert_eq!("peer1", region_route.leader_peer.as_ref().unwrap().addr);
assert_eq!(1, region_route.follower_peers.len());
assert_eq!(2, region_route.follower_peers.get(0).unwrap().id);
assert_eq!("peer2", region_route.follower_peers.get(0).unwrap().addr);
}
#[test]
fn test_table_route_raw_conversion() {
let raw_peers = vec![
PbPeer {
id: 1,
addr: "a1".to_string(),
},
PbPeer {
id: 2,
addr: "a2".to_string(),
},
PbPeer {
id: 3,
addr: "a3".to_string(),
},
];
// region distribution:
// region id => leader peer id + [follower peer id]
// 1 => 2 + [1, 3]
// 2 => 1 + [2, 3]
let raw_table_route = PbTableRoute {
table: Some(PbTable {
id: 1,
table_name: Some(PbTableName {
catalog_name: "c1".to_string(),
schema_name: "s1".to_string(),
table_name: "t1".to_string(),
}),
table_schema: vec![],
}),
region_routes: vec![
PbRegionRoute {
region: Some(PbRegion {
id: 1,
name: "r1".to_string(),
partition: None,
attrs: HashMap::new(),
}),
leader_peer_index: 1,
follower_peer_indexes: vec![0, 2],
},
PbRegionRoute {
region: Some(PbRegion {
id: 2,
name: "r2".to_string(),
partition: None,
attrs: HashMap::new(),
}),
leader_peer_index: 0,
follower_peer_indexes: vec![1, 2],
},
],
};
let table_route = TableRoute {
table: Table {
id: 1,
table_name: TableName::new("c1", "s1", "t1"),
table_schema: vec![],
},
region_routes: vec![
RegionRoute {
region: Region {
id: 1.into(),
name: "r1".to_string(),
partition: None,
attrs: BTreeMap::new(),
},
leader_peer: Some(Peer::new(2, "a2")),
follower_peers: vec![Peer::new(1, "a1"), Peer::new(3, "a3")],
},
RegionRoute {
region: Region {
id: 2.into(),
name: "r2".to_string(),
partition: None,
attrs: BTreeMap::new(),
},
leader_peer: Some(Peer::new(1, "a1")),
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
},
],
region_leaders: HashMap::from([
(2, Some(Peer::new(1, "a1"))),
(1, Some(Peer::new(2, "a2"))),
]),
};
let from_raw = TableRoute::try_from_raw(&raw_peers, raw_table_route.clone()).unwrap();
assert_eq!(from_raw, table_route);
let into_raw = table_route.try_into_raw().unwrap();
assert_eq!(into_raw.0, raw_peers);
assert_eq!(into_raw.1, raw_table_route);
}
}

View File

@@ -21,8 +21,7 @@ use api::v1::meta::{
BatchPutRequest as PbBatchPutRequest, BatchPutResponse as PbBatchPutResponse,
CompareAndPutRequest as PbCompareAndPutRequest,
CompareAndPutResponse as PbCompareAndPutResponse, DeleteRangeRequest as PbDeleteRangeRequest,
DeleteRangeResponse as PbDeleteRangeResponse, MoveValueRequest as PbMoveValueRequest,
MoveValueResponse as PbMoveValueResponse, PutRequest as PbPutRequest,
DeleteRangeResponse as PbDeleteRangeResponse, PutRequest as PbPutRequest,
PutResponse as PbPutResponse, RangeRequest as PbRangeRequest, RangeResponse as PbRangeResponse,
ResponseHeader as PbResponseHeader,
};
@@ -794,71 +793,6 @@ impl DeleteRangeResponse {
}
}
#[derive(Debug, Clone, Default)]
pub struct MoveValueRequest {
/// If from_key dose not exist, return the value of to_key (if it exists).
/// If from_key exists, move the value of from_key to to_key (i.e. rename),
/// and return the value.
pub from_key: Vec<u8>,
pub to_key: Vec<u8>,
}
impl From<MoveValueRequest> for PbMoveValueRequest {
fn from(req: MoveValueRequest) -> Self {
Self {
header: None,
from_key: req.from_key,
to_key: req.to_key,
}
}
}
impl From<PbMoveValueRequest> for MoveValueRequest {
fn from(value: PbMoveValueRequest) -> Self {
Self {
from_key: value.from_key,
to_key: value.to_key,
}
}
}
impl MoveValueRequest {
#[inline]
pub fn new(from_key: impl Into<Vec<u8>>, to_key: impl Into<Vec<u8>>) -> Self {
Self {
from_key: from_key.into(),
to_key: to_key.into(),
}
}
}
#[derive(Debug, Clone)]
pub struct MoveValueResponse(pub Option<KeyValue>);
impl TryFrom<PbMoveValueResponse> for MoveValueResponse {
type Error = error::Error;
fn try_from(pb: PbMoveValueResponse) -> Result<Self> {
util::check_response_header(pb.header.as_ref())?;
Ok(Self(pb.kv.map(KeyValue::new)))
}
}
impl MoveValueResponse {
pub fn to_proto_resp(self, header: PbResponseHeader) -> PbMoveValueResponse {
PbMoveValueResponse {
header: Some(header),
kv: self.0.map(Into::into),
}
}
#[inline]
pub fn take_kv(&mut self) -> Option<KeyValue> {
self.0.take()
}
}
#[cfg(test)]
mod tests {
use api::v1::meta::{
@@ -866,10 +800,8 @@ mod tests {
CompareAndPutRequest as PbCompareAndPutRequest,
CompareAndPutResponse as PbCompareAndPutResponse,
DeleteRangeRequest as PbDeleteRangeRequest, DeleteRangeResponse as PbDeleteRangeResponse,
KeyValue as PbKeyValue, MoveValueRequest as PbMoveValueRequest,
MoveValueResponse as PbMoveValueResponse, PutRequest as PbPutRequest,
PutResponse as PbPutResponse, RangeRequest as PbRangeRequest,
RangeResponse as PbRangeResponse,
KeyValue as PbKeyValue, PutRequest as PbPutRequest, PutResponse as PbPutResponse,
RangeRequest as PbRangeRequest, RangeResponse as PbRangeResponse,
};
use super::*;
@@ -1172,34 +1104,4 @@ mod tests {
assert_eq!(b"v2".to_vec(), kv1.value().to_vec());
assert_eq!(b"v2".to_vec(), kv1.take_value());
}
#[test]
fn test_move_value_request_trans() {
let (from_key, to_key) = (b"test_key1".to_vec(), b"test_key2".to_vec());
let req = MoveValueRequest::new(from_key.clone(), to_key.clone());
let into_req: PbMoveValueRequest = req.into();
assert!(into_req.header.is_none());
assert_eq!(from_key, into_req.from_key);
assert_eq!(to_key, into_req.to_key);
}
#[test]
fn test_move_value_response_trans() {
let pb_res = PbMoveValueResponse {
header: None,
kv: Some(PbKeyValue {
key: b"k1".to_vec(),
value: b"v1".to_vec(),
}),
};
let mut res: MoveValueResponse = pb_res.try_into().unwrap();
let mut kv = res.take_kv().unwrap();
assert_eq!(b"k1".to_vec(), kv.key().to_vec());
assert_eq!(b"k1".to_vec(), kv.take_key());
assert_eq!(b"v1".to_vec(), kv.value().to_vec());
assert_eq!(b"v1".to_vec(), kv.take_value());
}
}

View File

@@ -166,15 +166,14 @@ mod tests {
use crate::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse,
BatchPutRequest, BatchPutResponse, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
#[tokio::test]
async fn test_sequence() {
let kv_store = Arc::new(MemoryKvBackend::default());
let kv_backend = Arc::new(MemoryKvBackend::default());
let initial = 1024;
let seq = Sequence::new("test_seq", initial, 10, kv_store);
let seq = Sequence::new("test_seq", initial, 10, kv_backend);
for i in initial..initial + 100 {
assert_eq!(i, seq.next().await.unwrap());
@@ -183,9 +182,9 @@ mod tests {
#[tokio::test]
async fn test_sequence_out_of_rage() {
let kv_store = Arc::new(MemoryKvBackend::default());
let kv_backend = Arc::new(MemoryKvBackend::default());
let initial = u64::MAX - 10;
let seq = Sequence::new("test_seq", initial, 10, kv_store);
let seq = Sequence::new("test_seq", initial, 10, kv_backend);
for _ in 0..10 {
let _ = seq.next().await.unwrap();
@@ -247,14 +246,10 @@ mod tests {
async fn batch_delete(&self, _: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
unreachable!()
}
async fn move_value(&self, _: MoveValueRequest) -> Result<MoveValueResponse> {
unreachable!()
}
}
let kv_store = Arc::new(Noop {});
let seq = Sequence::new("test_seq", 0, 10, kv_store);
let kv_backend = Arc::new(Noop {});
let seq = Sequence::new("test_seq", 0, 10, kv_backend);
let next = seq.next().await;
assert!(next.is_err());

View File

@@ -1047,6 +1047,6 @@ mod tests {
// Run the runner and execute the procedure.
runner.run().await;
let err = meta.state().error().unwrap().output_msg();
assert!(err.contains("Internal error"), "{err}");
assert!(err.contains("subprocedure failed"), "{err}");
}
}

View File

@@ -33,7 +33,7 @@ impl ColumnarValue {
match self {
ColumnarValue::Vector(vector) => vector.data_type(),
ColumnarValue::Scalar(scalar_value) => {
ConcreteDataType::from_arrow_type(&scalar_value.get_datatype())
ConcreteDataType::from_arrow_type(&scalar_value.data_type())
}
}
}

View File

@@ -43,7 +43,7 @@ pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OrderOption {
pub name: String,
pub options: SortOptions,

View File

@@ -9,9 +9,10 @@ async-trait.workspace = true
common-error = { workspace = true }
common-macro = { workspace = true }
common-telemetry = { workspace = true }
metrics.workspace = true
lazy_static.workspace = true
once_cell.workspace = true
paste.workspace = true
prometheus.workspace = true
snafu.workspace = true
tokio-util.workspace = true
tokio.workspace = true

View File

@@ -13,6 +13,22 @@
// limitations under the License.
//! Runtime metrics
pub const THREAD_NAME_LABEL: &str = "thread.name";
pub const METRIC_RUNTIME_THREADS_ALIVE: &str = "runtime.threads.alive";
pub const METRIC_RUNTIME_THREADS_IDLE: &str = "runtime.threads.idle";
use lazy_static::lazy_static;
use prometheus::*;
pub const THREAD_NAME_LABEL: &str = "thread_name";
lazy_static! {
pub static ref METRIC_RUNTIME_THREADS_ALIVE: IntGaugeVec = register_int_gauge_vec!(
"runtime_threads_alive",
"runtime threads alive",
&[THREAD_NAME_LABEL]
)
.unwrap();
pub static ref METRIC_RUNTIME_THREADS_IDLE: IntGaugeVec = register_int_gauge_vec!(
"runtime_threads_idle",
"runtime threads idle",
&[THREAD_NAME_LABEL]
)
.unwrap();
}

View File

@@ -18,7 +18,6 @@ use std::sync::Arc;
use std::thread;
use std::time::Duration;
use metrics::{decrement_gauge, increment_gauge};
use snafu::ResultExt;
use tokio::runtime::{Builder as RuntimeBuilder, Handle};
use tokio::sync::oneshot;
@@ -172,29 +171,33 @@ impl Builder {
fn on_thread_start(thread_name: String) -> impl Fn() + 'static {
move || {
let labels = [(THREAD_NAME_LABEL, thread_name.clone())];
increment_gauge!(METRIC_RUNTIME_THREADS_ALIVE, 1.0, &labels);
METRIC_RUNTIME_THREADS_ALIVE
.with_label_values(&[thread_name.as_str()])
.inc();
}
}
fn on_thread_stop(thread_name: String) -> impl Fn() + 'static {
move || {
let labels = [(THREAD_NAME_LABEL, thread_name.clone())];
decrement_gauge!(METRIC_RUNTIME_THREADS_ALIVE, 1.0, &labels);
METRIC_RUNTIME_THREADS_ALIVE
.with_label_values(&[thread_name.as_str()])
.dec();
}
}
fn on_thread_park(thread_name: String) -> impl Fn() + 'static {
move || {
let labels = [(THREAD_NAME_LABEL, thread_name.clone())];
increment_gauge!(METRIC_RUNTIME_THREADS_IDLE, 1.0, &labels);
METRIC_RUNTIME_THREADS_IDLE
.with_label_values(&[thread_name.as_str()])
.inc();
}
}
fn on_thread_unpark(thread_name: String) -> impl Fn() + 'static {
move || {
let labels = [(THREAD_NAME_LABEL, thread_name.clone())];
decrement_gauge!(METRIC_RUNTIME_THREADS_IDLE, 1.0, &labels);
METRIC_RUNTIME_THREADS_IDLE
.with_label_values(&[thread_name.as_str()])
.dec();
}
}
@@ -204,14 +207,13 @@ mod tests {
use std::thread;
use std::time::Duration;
use common_telemetry::metric;
use common_telemetry::dump_metrics;
use tokio::sync::oneshot;
use tokio_test::assert_ok;
use super::*;
fn runtime() -> Arc<Runtime> {
common_telemetry::init_default_metrics_recorder();
let runtime = Builder::default()
.worker_threads(2)
.thread_name("test_spawn_join")
@@ -221,7 +223,6 @@ mod tests {
#[test]
fn test_metric() {
common_telemetry::init_default_metrics_recorder();
let runtime = Builder::default()
.worker_threads(5)
.thread_name("test_runtime_metric")
@@ -236,8 +237,7 @@ mod tests {
thread::sleep(Duration::from_millis(10));
let handle = metric::try_handle().unwrap();
let metric_text = handle.render();
let metric_text = dump_metrics().unwrap();
assert!(metric_text.contains("runtime_threads_idle{thread_name=\"test_runtime_metric\"}"));
assert!(metric_text.contains("runtime_threads_alive{thread_name=\"test_runtime_metric\"}"));

View File

@@ -27,7 +27,7 @@ table = { workspace = true }
[dependencies.substrait_proto]
package = "substrait"
version = "0.12"
version = "0.17"
[dev-dependencies]
datatypes = { workspace = true }

View File

@@ -47,9 +47,9 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
schema: &str,
) -> Result<Self::Plan, Self::Error> {
let state_config = SessionConfig::new().with_default_catalog_and_schema(catalog, schema);
let state = SessionState::with_config_rt(state_config, Arc::new(RuntimeEnv::default()))
let state = SessionState::new_with_config_rt(state_config, Arc::new(RuntimeEnv::default()))
.with_serializer_registry(Arc::new(ExtensionSerializer));
let mut context = SessionContext::with_state(state);
let mut context = SessionContext::new_with_state(state);
context.register_catalog_list(catalog_list);
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
let df_plan = from_substrait_plan(&mut context, &plan)
@@ -61,9 +61,9 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
fn encode(&self, plan: &Self::Plan) -> Result<Bytes, Self::Error> {
let mut buf = BytesMut::new();
let session_state =
SessionState::with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default()))
SessionState::new_with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default()))
.with_serializer_registry(Arc::new(ExtensionSerializer));
let context = SessionContext::with_state(session_state);
let context = SessionContext::new_with_state(session_state);
let substrait_plan = to_substrait_plan(plan, &context).context(EncodeDfPlanSnafu)?;
substrait_plan.encode(&mut buf).context(EncodeRelSnafu)?;

View File

@@ -12,9 +12,7 @@ deadlock_detection = ["parking_lot/deadlock_detection"]
backtrace = "0.3"
common-error = { workspace = true }
console-subscriber = { version = "0.1", optional = true }
metrics-exporter-prometheus = { version = "0.11", default-features = false }
metrics-util = "0.14"
metrics.workspace = true
lazy_static.workspace = true
once_cell.workspace = true
opentelemetry = { version = "0.17", default-features = false, features = [
"trace",
@@ -22,6 +20,7 @@ opentelemetry = { version = "0.17", default-features = false, features = [
] }
opentelemetry-jaeger = { version = "0.16", features = ["rt-tokio"] }
parking_lot = { version = "0.12" }
prometheus.workspace = true
rand.workspace = true
rs-snowflake = "0.6"
serde.workspace = true

View File

@@ -21,7 +21,7 @@ use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
pub use logging::{init_default_ut_logging, init_global_logging, trace_id, TRACE_ID};
pub use metric::init_default_metrics_recorder;
pub use metric::dump_metrics;
use once_cell::sync::OnceCell;
pub use panic_hook::set_panic_hook;
use parking_lot::Mutex;

View File

@@ -14,174 +14,14 @@
// metric stuffs, inspired by databend
use std::fmt;
use std::sync::{Arc, Once, RwLock};
use std::time::{Duration, Instant};
use prometheus::{Encoder, TextEncoder};
use metrics::{register_histogram, Histogram, IntoLabels};
use metrics_exporter_prometheus::PrometheusBuilder;
pub use metrics_exporter_prometheus::PrometheusHandle;
use metrics_util::layers::{Layer, PrefixLayer};
use once_cell::sync::Lazy;
static PROMETHEUS_HANDLE: Lazy<Arc<RwLock<Option<PrometheusHandle>>>> =
Lazy::new(|| Arc::new(RwLock::new(None)));
pub fn init_default_metrics_recorder() {
static START: Once = Once::new();
START.call_once(init_prometheus_recorder)
}
/// Init prometheus recorder.
fn init_prometheus_recorder() {
let recorder = PrometheusBuilder::new().build_recorder();
let mut h = PROMETHEUS_HANDLE.as_ref().write().unwrap();
*h = Some(recorder.handle());
// TODO(LFC): separate metrics for testing and metrics for production
// `clear_recorder` is likely not expected to be called in production code, recorder should be
// globally unique and used throughout the whole lifetime of an application.
// It's marked as "unsafe" since [this PR](https://github.com/metrics-rs/metrics/pull/302), and
// "metrics" version also upgraded to 0.19.
// A quick look in the metrics codes suggests that the "unsafe" call is of no harm. However,
// it required a further investigation in how to use metric properly.
unsafe {
metrics::clear_recorder();
}
let layer = PrefixLayer::new("greptime");
let layered = layer.layer(recorder);
match metrics::set_boxed_recorder(Box::new(layered)) {
Ok(_) => (),
Err(err) => crate::warn!("Install prometheus recorder failed, cause: {}", err),
};
}
pub fn try_handle() -> Option<PrometheusHandle> {
PROMETHEUS_HANDLE.as_ref().read().unwrap().clone()
}
/// A Histogram timer that emits the elapsed time to the histogram on drop.
#[must_use = "Timer should be kept in a variable otherwise it cannot observe duration"]
pub struct Timer {
start: Instant,
histogram: Histogram,
observed: bool,
}
impl From<Histogram> for Timer {
fn from(histogram: Histogram) -> Timer {
Timer::from_histogram(histogram)
}
}
impl fmt::Debug for Timer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Timer")
.field("start", &self.start)
.field("observed", &self.observed)
.finish()
}
}
impl Timer {
/// Creates a timer from given histogram.
pub fn from_histogram(histogram: Histogram) -> Self {
Self {
start: Instant::now(),
histogram,
observed: false,
}
}
/// Creates a timer from given `name`.
pub fn new(name: &'static str) -> Self {
Self {
start: Instant::now(),
histogram: register_histogram!(name),
observed: false,
}
}
/// Creates a timer from given `name`.
pub fn new_with_labels<L: IntoLabels>(name: &'static str, labels: L) -> Self {
Self {
start: Instant::now(),
histogram: register_histogram!(name, labels),
observed: false,
}
}
/// Returns the elapsed duration from the time this timer created.
pub fn elapsed(&self) -> Duration {
self.start.elapsed()
}
/// Discards the timer result.
pub fn discard(mut self) {
self.observed = true;
}
}
impl Drop for Timer {
fn drop(&mut self) {
if !self.observed {
self.histogram.record(self.elapsed())
}
}
}
#[macro_export]
macro_rules! timer {
($name: expr) => {
$crate::metric::Timer::new($name)
};
($name:expr, $labels:expr) => {
$crate::metric::Timer::new_with_labels($name, $labels)
};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_elapsed_timer() {
init_default_metrics_recorder();
{
let _t = timer!("test_elapsed_timer_a");
}
let handle = try_handle().unwrap();
let text = handle.render();
assert!(text.contains("test_elapsed_timer_a"));
assert!(!text.contains("test_elapsed_timer_b"));
let _ = timer!("test_elapsed_timer_b");
let text = handle.render();
assert!(text.contains("test_elapsed_timer_a"));
assert!(text.contains("test_elapsed_timer_b"));
}
#[test]
fn test_elapsed_timer_with_label() {
init_default_metrics_recorder();
{
let _t = timer!("test_elapsed_timer_a");
}
let handle = try_handle().unwrap();
let text = handle.render();
assert!(text.contains("test_elapsed_timer_a"));
assert!(!text.contains("test_elapsed_timer_b"));
let label_a = "label_a";
let label_b = "label_b";
assert!(!text.contains(label_a));
assert!(!text.contains(label_b));
{
let _t = timer!("test_elapsed_timer_b", &[(label_a, "a"), (label_b, "b")]);
}
let text = handle.render();
assert!(text.contains("test_elapsed_timer_a"));
assert!(text.contains("test_elapsed_timer_b"));
assert!(text.contains(label_a));
assert!(text.contains(label_b));
}
pub fn dump_metrics() -> Result<String, String> {
let mut buffer = Vec::new();
let encoder = TextEncoder::new();
let metric_families = prometheus::gather();
encoder
.encode(&metric_families, &mut buffer)
.map_err(|_| "Encode metrics failed".to_string())?;
String::from_utf8(buffer).map_err(|e| e.to_string())
}

View File

@@ -17,7 +17,13 @@ use std::panic;
use std::time::Duration;
use backtrace::Backtrace;
use metrics::increment_counter;
use lazy_static::lazy_static;
use prometheus::*;
lazy_static! {
pub static ref PANIC_COUNTER: IntCounter =
register_int_counter!("panic_counter", "panic_counter").unwrap();
}
pub fn set_panic_hook() {
// Set a panic hook that records the panic as a `tracing` event at the
@@ -41,7 +47,7 @@ pub fn set_panic_hook() {
} else {
tracing::error!(message = %panic, backtrace = %backtrace);
}
increment_counter!("panic_counter");
PANIC_COUNTER.inc();
default_hook(panic);
}));

View File

@@ -408,11 +408,6 @@ mod tests {
Time::new(1, TimeUnit::Millisecond)
.to_timezone_aware_string(TimeZone::from_tz_string("UTC").unwrap())
);
assert_eq!(
"02:00:00.001",
Time::new(1, TimeUnit::Millisecond)
.to_timezone_aware_string(TimeZone::from_tz_string("Europe/Berlin").unwrap())
);
assert_eq!(
"03:00:00.001",
Time::new(1, TimeUnit::Millisecond)

View File

@@ -21,7 +21,7 @@ use std::time::Duration;
use arrow::datatypes::TimeUnit as ArrowTimeUnit;
use chrono::{
DateTime, LocalResult, NaiveDate, NaiveDateTime, NaiveTime, TimeZone as ChronoTimeZone, Utc,
DateTime, LocalResult, NaiveDate, NaiveDateTime, NaiveTime, TimeZone as ChronoTimeZone,
};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
@@ -63,7 +63,7 @@ impl Timestamp {
TimeUnit::Second => now.timestamp(),
TimeUnit::Millisecond => now.timestamp_millis(),
TimeUnit::Microsecond => now.timestamp_micros(),
TimeUnit::Nanosecond => now.timestamp_nanos(),
TimeUnit::Nanosecond => now.timestamp_nanos_opt().unwrap_or_default(),
};
Timestamp { value, unit }
}
@@ -182,7 +182,7 @@ impl Timestamp {
/// Split a [Timestamp] into seconds part and nanoseconds part.
/// Notice the seconds part of split result is always rounded down to floor.
fn split(&self) -> (i64, u32) {
pub fn split(&self) -> (i64, u32) {
let sec_mul = (TimeUnit::Second.factor() / self.unit.factor()) as i64;
let nsec_mul = (self.unit.factor() / TimeUnit::Nanosecond.factor()) as i64;
@@ -287,6 +287,7 @@ impl FromStr for Timestamp {
/// - `2022-09-20 14:16:43.012345Z` (Zulu timezone, without T)
/// - `2022-09-20 14:16:43` (local timezone, without T)
/// - `2022-09-20 14:16:43.012345` (local timezone, without T)
#[allow(deprecated)]
fn from_str(s: &str) -> Result<Self, Self::Err> {
// RFC3339 timestamp (with a T)
let s = s.trim();
@@ -298,7 +299,7 @@ impl FromStr for Timestamp {
return Timestamp::from_chrono_datetime(ts.naive_utc())
.context(ParseTimestampSnafu { raw: s });
}
if let Ok(ts) = Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
if let Ok(ts) = chrono::Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
return Timestamp::from_chrono_datetime(ts.naive_utc())
.context(ParseTimestampSnafu { raw: s });
}

View File

@@ -4,6 +4,9 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[features]
testing = []
[dependencies]
api = { workspace = true }
arrow-flight.workspace = true
@@ -41,12 +44,13 @@ futures = "0.3"
futures-util.workspace = true
humantime-serde.workspace = true
hyper = { version = "0.14", features = ["full"] }
lazy_static.workspace = true
log-store = { workspace = true }
meta-client = { workspace = true }
metrics.workspace = true
mito2 = { workspace = true }
object-store = { workspace = true }
pin-project = "1.0"
prometheus.workspace = true
prost.workspace = true
query = { workspace = true }
reqwest = { workspace = true }

View File

@@ -33,6 +33,7 @@ use futures_util::StreamExt;
use log_store::raft_engine::log_store::RaftEngineLogStore;
use meta_client::client::MetaClient;
use mito2::engine::MitoEngine;
use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
use object_store::util::normalize_dir;
use query::QueryEngineFactory;
use servers::Mode;
@@ -354,7 +355,12 @@ impl DatanodeBuilder {
let mut region_server =
RegionServer::new(query_engine.clone(), runtime.clone(), event_listener);
let object_store = store::new_object_store(opts).await?;
let engines = Self::build_store_engines(opts, log_store, object_store).await?;
let object_store_manager = ObjectStoreManager::new(
"default", // TODO: use a name which is set in the configuration when #919 is done.
object_store,
);
let engines =
Self::build_store_engines(opts, log_store, Arc::new(object_store_manager)).await?;
for engine in engines {
region_server.register_engine(engine);
}
@@ -392,7 +398,7 @@ impl DatanodeBuilder {
async fn build_store_engines<S>(
opts: &DatanodeOptions,
log_store: Arc<S>,
object_store: object_store::ObjectStore,
object_store_manager: ObjectStoreManagerRef,
) -> Result<Vec<RegionEngineRef>>
where
S: LogStore,
@@ -401,12 +407,18 @@ impl DatanodeBuilder {
for engine in &opts.region_engine {
match engine {
RegionEngineConfig::Mito(config) => {
let engine: MitoEngine =
MitoEngine::new(config.clone(), log_store.clone(), object_store.clone());
let engine: MitoEngine = MitoEngine::new(
config.clone(),
log_store.clone(),
object_store_manager.clone(),
);
engines.push(Arc::new(engine) as _);
}
RegionEngineConfig::File(config) => {
let engine = FileRegionEngine::new(config.clone(), object_store.clone());
let engine = FileRegionEngine::new(
config.clone(),
object_store_manager.default_object_store().clone(), // TODO: implement custom storage for file engine
);
engines.push(Arc::new(engine) as _);
}
}

View File

@@ -463,7 +463,6 @@ impl ErrorExt for Error {
ColumnNotFound { .. } => StatusCode::TableColumnNotFound,
ParseSql { source, .. } => source.status_code(),
DeleteExprToRequest { source, .. } | InsertData { source, .. } => source.status_code(),
ColumnValuesNumberMismatch { .. }

View File

@@ -16,7 +16,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::{HeartbeatRequest, Peer, RegionStat, Role};
use api::v1::meta::{HeartbeatRequest, Peer, RegionRole, RegionStat, Role};
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
@@ -280,15 +280,26 @@ impl HeartbeatTask {
async fn load_region_stats(region_server: &RegionServer) -> Vec<RegionStat> {
let regions = region_server.opened_regions();
regions
.into_iter()
.map(|(region_id, engine)| RegionStat {
region_id: region_id.as_u64(),
engine,
let mut region_stats = Vec::new();
for stat in regions {
let approximate_bytes = region_server
.region_disk_usage(stat.region_id)
.await
.unwrap_or(0);
let region_stat = RegionStat {
region_id: stat.region_id.as_u64(),
engine: stat.engine,
role: RegionRole::from(stat.role).into(),
approximate_bytes,
// TODO(ruihang): scratch more info
..Default::default()
})
.collect::<Vec<_>>()
rcus: 0,
wcus: 0,
approximate_rows: 0,
};
region_stats.push(region_stat);
}
region_stats
}
pub async fn close(&self) -> Result<()> {

View File

@@ -12,9 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! datanode metrics
use lazy_static::lazy_static;
use prometheus::*;
/// The elapsed time of handling a request in the region_server.
pub const HANDLE_REGION_REQUEST_ELAPSED: &str = "datanode.handle_region_request_elapsed";
/// Region request type label.
pub const REGION_REQUEST_TYPE: &str = "datanode.region_request_type";
pub const REGION_REQUEST_TYPE: &str = "datanode_region_request_type";
lazy_static! {
/// The elapsed time of handling a request in the region_server.
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
"datanode_handle_region_request_elapsed",
"datanode handle region request elapsed",
&[REGION_REQUEST_TYPE]
)
.unwrap();
}

View File

@@ -28,7 +28,7 @@ use common_query::physical_plan::DfPhysicalPlanAdapter;
use common_query::{DfPhysicalPlan, Output};
use common_recordbatch::SendableRecordBatchStream;
use common_runtime::Runtime;
use common_telemetry::{info, timer, warn};
use common_telemetry::{info, warn};
use dashmap::DashMap;
use datafusion::catalog::schema::SchemaProvider;
use datafusion::catalog::{CatalogList, CatalogProvider};
@@ -47,7 +47,7 @@ use servers::grpc::region_server::RegionServerHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::region_engine::RegionEngineRef;
use store_api::region_engine::{RegionEngineRef, RegionRole};
use store_api::region_request::{RegionCloseRequest, RegionRequest};
use store_api::storage::{RegionId, ScanRequest};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
@@ -66,6 +66,12 @@ pub struct RegionServer {
inner: Arc<RegionServerInner>,
}
pub struct RegionStat {
pub region_id: RegionId,
pub engine: String,
pub role: RegionRole,
}
impl RegionServer {
pub fn new(
query_engine: QueryEngineRef,
@@ -97,11 +103,19 @@ impl RegionServer {
self.inner.handle_read(request).await
}
pub fn opened_regions(&self) -> Vec<(RegionId, String)> {
pub fn opened_regions(&self) -> Vec<RegionStat> {
self.inner
.region_map
.iter()
.map(|e| (*e.key(), e.value().name().to_string()))
.filter_map(|e| {
let region_id = *e.key();
// Filters out any regions whose role equals None.
e.role(region_id).map(|role| RegionStat {
region_id,
engine: e.value().name().to_string(),
role,
})
})
.collect()
}
@@ -120,6 +134,13 @@ impl RegionServer {
self.inner.runtime.clone()
}
pub async fn region_disk_usage(&self, region_id: RegionId) -> Option<i64> {
match self.inner.region_map.get(&region_id) {
Some(e) => e.region_disk_usage(region_id).await,
None => None,
}
}
/// Stop the region server.
pub async fn stop(&self) -> Result<()> {
self.inner.stop().await
@@ -228,10 +249,9 @@ impl RegionServerInner {
request: RegionRequest,
) -> Result<Output> {
let request_type = request.request_type();
let _timer = timer!(
crate::metrics::HANDLE_REGION_REQUEST_ELAPSED,
&[(crate::metrics::REGION_REQUEST_TYPE, request_type),]
);
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
.with_label_values(&[request_type])
.start_timer();
let region_change = match &request {
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone()),

View File

@@ -26,7 +26,7 @@ use std::{env, path};
use common_base::readable_size::ReadableSize;
use common_telemetry::logging::info;
use object_store::layers::{LoggingLayer, LruCacheLayer, MetricsLayer, RetryLayer, TracingLayer};
use object_store::layers::{LoggingLayer, LruCacheLayer, RetryLayer, TracingLayer};
use object_store::services::Fs as FsBuilder;
use object_store::util::normalize_dir;
use object_store::{util, HttpClient, ObjectStore, ObjectStoreBuilder};
@@ -58,8 +58,7 @@ pub(crate) async fn new_object_store(opts: &DatanodeOptions) -> Result<ObjectSto
object_store
};
Ok(object_store
.layer(MetricsLayer)
let store = object_store
.layer(
LoggingLayer::default()
// Print the expected error only in DEBUG level.
@@ -67,7 +66,23 @@ pub(crate) async fn new_object_store(opts: &DatanodeOptions) -> Result<ObjectSto
.with_error_level(Some("debug"))
.expect("input error level must be valid"),
)
.layer(TracingLayer))
.layer(TracingLayer);
// In the test environment, multiple datanodes will be started in the same process.
// If each datanode registers Prometheus metric when it starts, it will cause the program to crash. (Because the same metric is registered repeatedly.)
// So the Prometheus metric layer is disabled in the test environment.
#[cfg(feature = "testing")]
return Ok(store);
#[cfg(not(feature = "testing"))]
{
let registry = prometheus::default_registry();
Ok(
store.layer(object_store::layers::PrometheusLayer::with_registry(
registry.clone(),
)),
)
}
}
async fn create_object_store_with_cache(

View File

@@ -37,7 +37,7 @@ use query::query_engine::DescribeResult;
use query::QueryEngine;
use session::context::QueryContextRef;
use store_api::metadata::RegionMetadataRef;
use store_api::region_engine::RegionEngine;
use store_api::region_engine::{RegionEngine, RegionRole};
use store_api::region_request::RegionRequest;
use store_api::storage::{RegionId, ScanRequest};
use table::TableRef;
@@ -179,6 +179,10 @@ impl RegionEngine for MockRegionEngine {
unimplemented!()
}
async fn region_disk_usage(&self, _region_id: RegionId) -> Option<i64> {
unimplemented!()
}
async fn stop(&self) -> Result<(), BoxedError> {
Ok(())
}
@@ -186,4 +190,8 @@ impl RegionEngine for MockRegionEngine {
fn set_writable(&self, _region_id: RegionId, _writable: bool) -> Result<(), BoxedError> {
Ok(())
}
fn role(&self, _region_id: RegionId) -> Option<RegionRole> {
Some(RegionRole::Leader)
}
}

View File

@@ -24,7 +24,9 @@ use crate::value::Value;
use crate::vectors::operations::VectorOp;
use crate::vectors::{TimestampMillisecondVector, VectorRef};
const CURRENT_TIMESTAMP: &str = "current_timestamp()";
const CURRENT_TIMESTAMP: &str = "current_timestamp";
const CURRENT_TIMESTAMP_FN: &str = "current_timestamp()";
const NOW_FN: &str = "now()";
/// Column's default constraint.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -77,7 +79,7 @@ impl ColumnDefaultConstraint {
match self {
ColumnDefaultConstraint::Function(expr) => {
ensure!(
expr == CURRENT_TIMESTAMP,
expr == CURRENT_TIMESTAMP || expr == CURRENT_TIMESTAMP_FN || expr == NOW_FN,
error::UnsupportedDefaultExprSnafu { expr }
);
ensure!(
@@ -130,7 +132,9 @@ impl ColumnDefaultConstraint {
match &expr[..] {
// TODO(dennis): we only supports current_timestamp right now,
// it's better to use a expression framework in future.
CURRENT_TIMESTAMP => create_current_timestamp_vector(data_type, num_rows),
CURRENT_TIMESTAMP | CURRENT_TIMESTAMP_FN | NOW_FN => {
create_current_timestamp_vector(data_type, num_rows)
}
_ => error::UnsupportedDefaultExprSnafu { expr }.fail(),
}
}
@@ -160,7 +164,9 @@ impl ColumnDefaultConstraint {
// Functions should also ensure its return value is not null when
// is_nullable is true.
match &expr[..] {
CURRENT_TIMESTAMP => create_current_timestamp(data_type),
CURRENT_TIMESTAMP | CURRENT_TIMESTAMP_FN | NOW_FN => {
create_current_timestamp(data_type)
}
_ => error::UnsupportedDefaultExprSnafu { expr }.fail(),
}
}

View File

@@ -203,6 +203,22 @@ impl Value {
}
}
/// Cast Value to Date. Return None if value is not a valid date data type.
pub fn as_date(&self) -> Option<Date> {
match self {
Value::Date(t) => Some(*t),
_ => None,
}
}
/// Cast Value to DateTime. Return None if value is not a valid datetime data type.
pub fn as_datetime(&self) -> Option<DateTime> {
match self {
Value::DateTime(t) => Some(*t),
_ => None,
}
}
/// Cast Value to [Time]. Return None if value is not a valid time data type.
pub fn as_time(&self) -> Option<Time> {
match self {
@@ -787,10 +803,11 @@ impl TryFrom<ScalarValue> for Value {
.map(|x| Value::Duration(Duration::new(x, TimeUnit::Nanosecond)))
.unwrap_or(Value::Null),
ScalarValue::Decimal128(_, _, _)
| ScalarValue::Decimal256(_, _, _)
| ScalarValue::Struct(_, _)
| ScalarValue::Dictionary(_, _) => {
return error::UnsupportedArrowTypeSnafu {
arrow_type: v.get_datatype(),
arrow_type: v.data_type(),
}
.fail()
}

View File

@@ -100,7 +100,8 @@ impl ConstantVector {
let arr = indices.to_arrow_array();
let indices_arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
if !arrow::compute::min_boolean(
&arrow::compute::lt_scalar(indices_arr, len as u32).unwrap(),
&arrow::compute::kernels::cmp::lt(indices_arr, &UInt32Array::new_scalar(len as u32))
.unwrap(),
)
.unwrap()
{

View File

@@ -219,6 +219,7 @@ impl Helper {
ConstantVector::new(Arc::new(IntervalMonthDayNanoVector::from(vec![v])), length)
}
ScalarValue::Decimal128(_, _, _)
| ScalarValue::Decimal256(_, _, _)
| ScalarValue::DurationSecond(_)
| ScalarValue::DurationMillisecond(_)
| ScalarValue::DurationMicrosecond(_)
@@ -345,7 +346,8 @@ impl Helper {
pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
let array = StringArray::from(names);
let filter = comparison::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
let s = StringArray::new_scalar(s);
let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
Helper::try_into_vector(result)
@@ -353,7 +355,8 @@ impl Helper {
pub fn like_utf8_filter(names: Vec<String>, s: &str) -> Result<(VectorRef, BooleanVector)> {
let array = StringArray::from(names);
let filter = comparison::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
let s = StringArray::new_scalar(s);
let filter = comparison::like(&array, &s).context(error::ArrowComputeSnafu)?;
let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
let vector = Helper::try_into_vector(result)?;
@@ -481,7 +484,8 @@ mod tests {
fn assert_filter(array: Vec<String>, s: &str, expected_filter: &BooleanVector) {
let array = StringArray::from(array);
let actual_filter = comparison::like_utf8_scalar(&array, s).unwrap();
let s = StringArray::new_scalar(s);
let actual_filter = comparison::like(&array, &s).unwrap();
assert_eq!(BooleanVector::from(actual_filter), *expected_filter);
}

View File

@@ -16,7 +16,7 @@ use std::any::Any;
use std::fmt;
use std::sync::Arc;
use arrow::array::{Array, ArrayData, ArrayRef, NullArray};
use arrow::array::{Array, ArrayRef, NullArray};
use snafu::{ensure, OptionExt};
use crate::data_type::ConcreteDataType;
@@ -44,10 +44,6 @@ impl NullVector {
pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
}
fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
}
impl From<NullArray> for NullVector {
@@ -74,14 +70,11 @@ impl Vector for NullVector {
}
fn to_arrow_array(&self) -> ArrayRef {
// TODO(yingwen): Replaced by clone after upgrading to arrow 28.0.
let data = self.to_array_data();
Arc::new(NullArray::from(data))
Arc::new(self.array.clone())
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(NullArray::from(data))
Box::new(self.array.clone())
}
fn validity(&self) -> Validity {
@@ -93,7 +86,7 @@ impl Vector for NullVector {
}
fn null_count(&self) -> usize {
self.array.null_count()
self.array.len()
}
fn is_null(&self, _row: usize) -> bool {
@@ -225,12 +218,11 @@ mod tests {
assert_eq!(v.len(), 32);
assert_eq!(0, v.memory_size());
let arrow_arr = v.to_arrow_array();
assert_eq!(arrow_arr.null_count(), 32);
assert_eq!(v.null_count(), 32);
let array2 = arrow_arr.slice(8, 16);
assert_eq!(array2.len(), 16);
assert_eq!(array2.null_count(), 16);
let vector2 = v.slice(8, 16);
assert_eq!(vector2.len(), 16);
assert_eq!(vector2.null_count(), 16);
assert_eq!("NullVector", v.vector_type_name());
assert!(!v.is_const());

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use common_catalog::consts::FILE_ENGINE;
@@ -24,12 +24,12 @@ use common_telemetry::{error, info};
use object_store::ObjectStore;
use snafu::{ensure, OptionExt};
use store_api::metadata::RegionMetadataRef;
use store_api::region_engine::RegionEngine;
use store_api::region_engine::{RegionEngine, RegionRole};
use store_api::region_request::{
RegionCloseRequest, RegionCreateRequest, RegionDropRequest, RegionOpenRequest, RegionRequest,
};
use store_api::storage::{RegionId, ScanRequest};
use tokio::sync::{Mutex, RwLock};
use tokio::sync::Mutex;
use crate::config::EngineConfig;
use crate::error::{
@@ -93,11 +93,19 @@ impl RegionEngine for FileRegionEngine {
self.inner.stop().await.map_err(BoxedError::new)
}
async fn region_disk_usage(&self, _: RegionId) -> Option<i64> {
None
}
fn set_writable(&self, region_id: RegionId, writable: bool) -> Result<(), BoxedError> {
self.inner
.set_writable(region_id, writable)
.map_err(BoxedError::new)
}
fn role(&self, region_id: RegionId) -> Option<RegionRole> {
self.inner.state(region_id)
}
}
struct EngineInner {
@@ -143,7 +151,7 @@ impl EngineInner {
async fn stop(&self) -> EngineResult<()> {
let _lock = self.region_mutex.lock().await;
self.regions.write().await.clear();
self.regions.write().unwrap().clear();
Ok(())
}
@@ -151,6 +159,14 @@ impl EngineInner {
// TODO(zhongzc): Improve the semantics and implementation of this API.
Ok(())
}
fn state(&self, region_id: RegionId) -> Option<RegionRole> {
if self.regions.read().unwrap().get(&region_id).is_some() {
Some(RegionRole::Leader)
} else {
None
}
}
}
impl EngineInner {
@@ -185,7 +201,7 @@ impl EngineInner {
region_id, err
);
})?;
self.regions.write().await.insert(region_id, region);
self.regions.write().unwrap().insert(region_id, region);
info!("A new region is created, region_id: {}", region_id);
Ok(Output::AffectedRows(0))
@@ -215,7 +231,7 @@ impl EngineInner {
region_id, err
);
})?;
self.regions.write().await.insert(region_id, region);
self.regions.write().unwrap().insert(region_id, region);
info!("Region opened, region_id: {}", region_id);
Ok(Output::AffectedRows(0))
@@ -228,7 +244,7 @@ impl EngineInner {
) -> EngineResult<Output> {
let _lock = self.region_mutex.lock().await;
let mut regions = self.regions.write().await;
let mut regions = self.regions.write().unwrap();
if regions.remove(&region_id).is_some() {
info!("Region closed, region_id: {}", region_id);
}
@@ -259,17 +275,17 @@ impl EngineInner {
);
})?;
}
let _ = self.regions.write().await.remove(&region_id);
let _ = self.regions.write().unwrap().remove(&region_id);
info!("Region dropped, region_id: {}", region_id);
Ok(Output::AffectedRows(0))
}
async fn get_region(&self, region_id: RegionId) -> Option<FileRegionRef> {
self.regions.read().await.get(&region_id).cloned()
self.regions.read().unwrap().get(&region_id).cloned()
}
async fn exists(&self, region_id: RegionId) -> bool {
self.regions.read().await.contains_key(&region_id)
self.regions.read().unwrap().contains_key(&region_id)
}
}

View File

@@ -46,19 +46,19 @@ futures = "0.3"
futures-util.workspace = true
humantime-serde.workspace = true
itertools.workspace = true
lazy_static.workspace = true
log-store = { workspace = true }
meta-client = { workspace = true }
raft-engine = { workspace = true }
# Although it is not used, please do not delete it.
metrics.workspace = true
moka = { workspace = true, features = ["future"] }
object-store = { workspace = true }
openmetrics-parser = "0.4"
opentelemetry-proto.workspace = true
operator.workspace = true
partition = { workspace = true }
prometheus.workspace = true
prost.workspace = true
query = { workspace = true }
raft-engine = { workspace = true }
regex.workspace = true
script = { workspace = true, features = ["python"], optional = true }
serde.workspace = true

View File

@@ -272,6 +272,12 @@ pub enum Error {
#[snafu(display("Invalid auth config"))]
IllegalAuthConfig { source: auth::error::Error },
#[snafu(display("Failed to serialize options to TOML"))]
TomlFormat {
#[snafu(source)]
error: toml::ser::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -279,7 +285,8 @@ pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ParseAddr { .. }
Error::TomlFormat { .. }
| Error::ParseAddr { .. }
| Error::InvalidSql { .. }
| Error::InvalidInsertRequest { .. }
| Error::InvalidDeleteRequest { .. }

View File

@@ -18,7 +18,9 @@ use serde::{Deserialize, Serialize};
use servers::heartbeat_options::HeartbeatOptions;
use servers::http::HttpOptions;
use servers::Mode;
use snafu::prelude::*;
use crate::error::{Result, TomlFormatSnafu};
use crate::service_config::{
DatanodeOptions, GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, OtlpOptions,
PostgresOptions, PromStoreOptions,
@@ -76,6 +78,16 @@ impl FrontendOptions {
}
}
pub trait TomlSerializable {
fn to_toml(&self) -> Result<String>;
}
impl TomlSerializable for FrontendOptions {
fn to_toml(&self) -> Result<String> {
toml::to_string(&self).context(TomlFormatSnafu)
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -30,7 +30,7 @@ use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager};
use catalog::CatalogManagerRef;
use client::client_manager::DatanodeClients;
use common_base::Plugins;
use common_config::KvStoreConfig;
use common_config::KvBackendConfig;
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cache_invalidator::DummyCacheInvalidator;
@@ -44,8 +44,8 @@ use common_procedure::local::{LocalManager, ManagerConfig};
use common_procedure::options::ProcedureConfig;
use common_procedure::ProcedureManagerRef;
use common_query::Output;
use common_telemetry::error;
use common_telemetry::logging::info;
use common_telemetry::{error, timer};
use datanode::region_server::RegionServer;
use log_store::raft_engine::RaftEngineBackend;
use meta_client::client::{MetaClient, MetaClientBuilder};
@@ -88,7 +88,7 @@ use crate::error::{
ParseSqlSnafu, PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu,
TableOperationSnafu,
};
use crate::frontend::FrontendOptions;
use crate::frontend::{FrontendOptions, TomlSerializable};
use crate::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
use crate::heartbeat::HeartbeatTask;
use crate::metrics;
@@ -257,23 +257,23 @@ impl Instance {
pub async fn try_build_standalone_components(
dir: String,
kv_store_config: KvStoreConfig,
kv_backend_config: KvBackendConfig,
procedure_config: ProcedureConfig,
) -> Result<(KvBackendRef, ProcedureManagerRef)> {
let kv_store = Arc::new(
let kv_backend = Arc::new(
RaftEngineBackend::try_open_with_cfg(Config {
dir,
purge_threshold: ReadableSize(kv_store_config.purge_threshold.0),
purge_threshold: ReadableSize(kv_backend_config.purge_threshold.0),
recovery_mode: RecoveryMode::TolerateTailCorruption,
batch_compression_threshold: ReadableSize::kb(8),
target_file_size: ReadableSize(kv_store_config.file_size.0),
target_file_size: ReadableSize(kv_backend_config.file_size.0),
..Default::default()
})
.map_err(BoxedError::new)
.context(error::OpenRaftEngineBackendSnafu)?,
);
let state_store = Arc::new(KvStateStore::new(kv_store.clone()));
let state_store = Arc::new(KvStateStore::new(kv_backend.clone()));
let manager_config = ManagerConfig {
max_retry_times: procedure_config.max_retry_times,
@@ -282,7 +282,7 @@ impl Instance {
};
let procedure_manager = Arc::new(LocalManager::new(manager_config, state_store));
Ok((kv_store, procedure_manager))
Ok((kv_backend, procedure_manager))
}
pub async fn try_new_standalone(
@@ -358,7 +358,10 @@ impl Instance {
})
}
pub async fn build_servers(&mut self, opts: &FrontendOptions) -> Result<()> {
pub async fn build_servers(
&mut self,
opts: impl Into<FrontendOptions> + TomlSerializable,
) -> Result<()> {
let servers = Services::build(opts, Arc::new(self.clone()), self.plugins.clone()).await?;
self.servers = Arc::new(servers);
@@ -422,7 +425,7 @@ impl SqlQueryHandler for Instance {
type Error = Error;
async fn do_query(&self, query: &str, query_ctx: QueryContextRef) -> Vec<Result<Output>> {
let _timer = timer!(metrics::METRIC_HANDLE_SQL_ELAPSED);
let _timer = metrics::METRIC_HANDLE_SQL_ELAPSED.start_timer();
let query_interceptor_opt = self.plugins.get::<SqlQueryInterceptorRef<Error>>();
let query_interceptor = query_interceptor_opt.as_ref();
let query = match query_interceptor.pre_parsing(query, query_ctx.clone()) {
@@ -482,7 +485,7 @@ impl SqlQueryHandler for Instance {
}
async fn do_exec_plan(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output> {
let _timer = timer!(metrics::METRIC_EXEC_PLAN_ELAPSED);
let _timer = metrics::METRIC_EXEC_PLAN_ELAPSED.start_timer();
// plan should be prepared before exec
// we'll do check there
self.query_engine
@@ -551,7 +554,7 @@ impl PrometheusHandler for Instance {
query: &PromQuery,
query_ctx: QueryContextRef,
) -> server_error::Result<Output> {
let _timer = timer!(metrics::METRIC_HANDLE_PROMQL_ELAPSED);
let _timer = metrics::METRIC_HANDLE_PROMQL_ELAPSED.start_timer();
let interceptor = self
.plugins
.get::<PromQueryInterceptorRef<server_error::Error>>();

View File

@@ -15,7 +15,6 @@
use async_trait::async_trait;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
use common_error::ext::BoxedError;
use metrics::counter;
use opentelemetry_proto::tonic::collector::metrics::v1::{
ExportMetricsServiceRequest, ExportMetricsServiceResponse,
};
@@ -51,7 +50,7 @@ impl OpenTelemetryProtocolHandler for Instance {
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
counter!(OTLP_METRICS_ROWS, rows as u64);
OTLP_METRICS_ROWS.inc_by(rows as u64);
let resp = ExportMetricsServiceResponse {
// TODO(sunng87): add support for partial_success in future patch
@@ -87,7 +86,7 @@ impl OpenTelemetryProtocolHandler for Instance {
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
counter!(OTLP_TRACES_ROWS, rows as u64);
OTLP_TRACES_ROWS.inc_by(rows as u64);
let resp = ExportTraceServiceResponse {
// TODO(fys): add support for partial_success in future patch

View File

@@ -21,7 +21,6 @@ use common_error::ext::BoxedError;
use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::logging;
use metrics::counter;
use prost::Message;
use servers::error::{self, AuthSnafu, Result as ServerResult};
use servers::prom_store::{self, Metrics};
@@ -64,7 +63,7 @@ fn negotiate_response_type(accepted_response_types: &[i32]) -> ServerResult<Resp
})?;
// It's safe to unwrap here, we known that it should be SAMPLES_RESPONSE_TYPE
Ok(ResponseType::from_i32(*response_type).unwrap())
Ok(ResponseType::try_from(*response_type).unwrap())
}
async fn to_query_result(table_name: &str, output: Output) -> ServerResult<QueryResult> {
@@ -161,7 +160,7 @@ impl PromStoreProtocolHandler for Instance {
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
counter!(PROM_STORE_REMOTE_WRITE_SAMPLES, samples as u64);
PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(samples as u64);
Ok(())
}

View File

@@ -16,7 +16,6 @@ use std::collections::HashMap;
use async_trait::async_trait;
use common_query::Output;
use common_telemetry::timer;
use servers::query_handler::ScriptHandler;
use session::context::QueryContextRef;
@@ -31,7 +30,7 @@ impl ScriptHandler for Instance {
name: &str,
script: &str,
) -> servers::error::Result<()> {
let _timer = timer!(metrics::METRIC_HANDLE_SCRIPTS_ELAPSED);
let _timer = metrics::METRIC_HANDLE_SCRIPTS_ELAPSED.start_timer();
self.script_executor
.insert_script(query_ctx, name, script)
.await
@@ -43,7 +42,7 @@ impl ScriptHandler for Instance {
name: &str,
params: HashMap<String, String>,
) -> servers::error::Result<Output> {
let _timer = timer!(metrics::METRIC_RUN_SCRIPT_ELAPSED);
let _timer = metrics::METRIC_RUN_SCRIPT_ELAPSED.start_timer();
self.script_executor
.execute_script(query_ctx, name, params)
.await

Some files were not shown because too many files have changed in this diff Show More