mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 20:32:56 +00:00
Compare commits
64 Commits
v0.18.0-ni
...
v1.0.0-bet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2bbc4bc4bc | ||
|
|
b1525e566b | ||
|
|
df954b47d5 | ||
|
|
acfd674332 | ||
|
|
e7928aaeee | ||
|
|
d5f52013ec | ||
|
|
c1e762960a | ||
|
|
7cc0439cc9 | ||
|
|
6eb7efcb76 | ||
|
|
5d0e94bfa8 | ||
|
|
e842d401fb | ||
|
|
8153068b89 | ||
|
|
bb6a3a2ff3 | ||
|
|
49c6812e98 | ||
|
|
24671b60b4 | ||
|
|
c7fded29ee | ||
|
|
afa8684ebd | ||
|
|
47937961f6 | ||
|
|
182cce4cc2 | ||
|
|
ac0e95c193 | ||
|
|
f567dcef86 | ||
|
|
30192d9802 | ||
|
|
62d109c1f4 | ||
|
|
910a383420 | ||
|
|
af6bbacc8c | ||
|
|
7616ffcb35 | ||
|
|
a3dbd029c5 | ||
|
|
9caeae391e | ||
|
|
35951afff9 | ||
|
|
a049b68c26 | ||
|
|
c2ff563ac6 | ||
|
|
82812ff19e | ||
|
|
4a77167138 | ||
|
|
934df46f53 | ||
|
|
fb92e4d0b2 | ||
|
|
0939dc1d32 | ||
|
|
50c9600ef8 | ||
|
|
abcfbd7f41 | ||
|
|
aac3ede261 | ||
|
|
3001c2d719 | ||
|
|
6caff50d01 | ||
|
|
421f4eec05 | ||
|
|
d944e5c6b8 | ||
|
|
013d61acbb | ||
|
|
b7e834ab92 | ||
|
|
5eab9a1be3 | ||
|
|
9de680f456 | ||
|
|
5deaaa59ec | ||
|
|
61724386ef | ||
|
|
6960a0183a | ||
|
|
30894d7599 | ||
|
|
acf38a7091 | ||
|
|
109b70750a | ||
|
|
ee5b7ff3c8 | ||
|
|
5d0ef376de | ||
|
|
11c0381fc1 | ||
|
|
e8b7b0ad16 | ||
|
|
6efffa427d | ||
|
|
6576e3555d | ||
|
|
f0afd675e3 | ||
|
|
37bc2e6b07 | ||
|
|
a9d1d33138 | ||
|
|
22d9eb6930 | ||
|
|
da976e534d |
6
.github/scripts/deploy-greptimedb.sh
vendored
6
.github/scripts/deploy-greptimedb.sh
vendored
@@ -7,6 +7,8 @@ KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.32.0}"
|
||||
ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
|
||||
DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
|
||||
GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
|
||||
GREPTIMEDB_OPERATOR_IMAGE_TAG=${GREPTIMEDB_OPERATOR_IMAGE_TAG:-v0.5.1}
|
||||
GREPTIMEDB_INITIALIZER_IMAGE_TAG="${GREPTIMEDB_OPERATOR_IMAGE_TAG}"
|
||||
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
|
||||
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
|
||||
ETCD_CHART_VERSION="${ETCD_CHART_VERSION:-12.0.8}"
|
||||
@@ -58,7 +60,7 @@ function deploy_greptimedb_operator() {
|
||||
# Use the latest chart and image.
|
||||
helm upgrade --install greptimedb-operator greptime/greptimedb-operator \
|
||||
--create-namespace \
|
||||
--set image.tag=latest \
|
||||
--set image.tag="$GREPTIMEDB_OPERATOR_IMAGE_TAG" \
|
||||
-n "$DEFAULT_INSTALL_NAMESPACE"
|
||||
|
||||
# Wait for greptimedb-operator to be ready.
|
||||
@@ -78,6 +80,7 @@ function deploy_greptimedb_cluster() {
|
||||
helm upgrade --install "$cluster_name" greptime/greptimedb-cluster \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set initializer.tag="$GREPTIMEDB_INITIALIZER_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
|
||||
-n "$install_namespace"
|
||||
@@ -115,6 +118,7 @@ function deploy_greptimedb_cluster_with_s3_storage() {
|
||||
helm upgrade --install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set initializer.tag="$GREPTIMEDB_INITIALIZER_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
|
||||
--set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
|
||||
|
||||
@@ -39,8 +39,11 @@ update_helm_charts_version() {
|
||||
--body "This PR updates the GreptimeDB version." \
|
||||
--base main \
|
||||
--head $BRANCH_NAME \
|
||||
--reviewer zyy17 \
|
||||
--reviewer daviderli614
|
||||
--reviewer sunng87 \
|
||||
--reviewer daviderli614 \
|
||||
--reviewer killme2008 \
|
||||
--reviewer evenyag \
|
||||
--reviewer fengjiachun
|
||||
}
|
||||
|
||||
update_helm_charts_version
|
||||
|
||||
@@ -35,8 +35,11 @@ update_homebrew_greptime_version() {
|
||||
--body "This PR updates the GreptimeDB version." \
|
||||
--base main \
|
||||
--head $BRANCH_NAME \
|
||||
--reviewer zyy17 \
|
||||
--reviewer daviderli614
|
||||
--reviewer sunng87 \
|
||||
--reviewer daviderli614 \
|
||||
--reviewer killme2008 \
|
||||
--reviewer evenyag \
|
||||
--reviewer fengjiachun
|
||||
}
|
||||
|
||||
update_homebrew_greptime_version
|
||||
|
||||
5
.github/workflows/develop.yml
vendored
5
.github/workflows/develop.yml
vendored
@@ -613,6 +613,9 @@ jobs:
|
||||
- name: "MySQL Kvbackend"
|
||||
opts: "--setup-mysql"
|
||||
kafka: false
|
||||
- name: "Flat format"
|
||||
opts: "--enable-flat-format"
|
||||
kafka: false
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -808,7 +811,7 @@ jobs:
|
||||
- name: Setup external services
|
||||
working-directory: tests-integration/fixtures
|
||||
run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
|
||||
|
||||
|
||||
- name: Run nextest cases
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
|
||||
env:
|
||||
|
||||
1
.github/workflows/docs.yml
vendored
1
.github/workflows/docs.yml
vendored
@@ -92,5 +92,6 @@ jobs:
|
||||
mode:
|
||||
- name: "Basic"
|
||||
- name: "Remote WAL"
|
||||
- name: "Flat format"
|
||||
steps:
|
||||
- run: 'echo "No action required"'
|
||||
|
||||
262
Cargo.lock
generated
262
Cargo.lock
generated
@@ -212,8 +212,9 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow-schema",
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
"common-error",
|
||||
@@ -732,7 +733,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1382,7 +1383,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1417,7 +1418,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1629,6 +1630,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"chrono-tz-build",
|
||||
"phf 0.11.3",
|
||||
"uncased",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1639,6 +1641,8 @@ checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402"
|
||||
dependencies = [
|
||||
"parse-zoneinfo",
|
||||
"phf_codegen 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
"uncased",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1759,7 +1763,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1812,7 +1816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1844,8 +1848,8 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.18.0",
|
||||
"substrait 0.37.3",
|
||||
"substrait 1.0.0-beta.1",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.13.1",
|
||||
@@ -1885,7 +1889,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1896,6 +1900,7 @@ dependencies = [
|
||||
"clap 4.5.40",
|
||||
"cli",
|
||||
"client",
|
||||
"colored",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
"common-config",
|
||||
@@ -1917,6 +1922,7 @@ dependencies = [
|
||||
"common-wal",
|
||||
"datanode",
|
||||
"datatypes",
|
||||
"either",
|
||||
"etcd-client",
|
||||
"file-engine",
|
||||
"flow",
|
||||
@@ -1932,7 +1938,9 @@ dependencies = [
|
||||
"moka",
|
||||
"nu-ansi-term",
|
||||
"object-store",
|
||||
"parquet",
|
||||
"plugins",
|
||||
"pprof",
|
||||
"prometheus",
|
||||
"prost 0.13.5",
|
||||
"query",
|
||||
@@ -1975,6 +1983,16 @@ version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "colored"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "7.1.2"
|
||||
@@ -1994,7 +2012,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -2018,11 +2036,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"const_format",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2046,7 +2067,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -2081,7 +2102,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.8",
|
||||
"common-error",
|
||||
@@ -2094,7 +2115,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-macro",
|
||||
"http 1.3.1",
|
||||
@@ -2105,7 +2126,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-event-recorder"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2127,7 +2148,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2149,7 +2170,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -2208,7 +2229,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2225,7 +2246,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2258,7 +2279,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2278,7 +2299,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"greptime-proto",
|
||||
"once_cell",
|
||||
@@ -2289,7 +2310,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common-error",
|
||||
@@ -2305,7 +2326,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2377,7 +2398,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2386,11 +2407,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2402,7 +2423,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-stream",
|
||||
@@ -2431,7 +2452,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2441,7 +2462,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2467,7 +2488,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-base",
|
||||
@@ -2491,7 +2512,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.40",
|
||||
@@ -2520,7 +2541,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-session"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"strum 0.27.1",
|
||||
@@ -2528,7 +2549,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-sql"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -2546,7 +2567,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-stat"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-runtime",
|
||||
@@ -2561,7 +2582,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-base",
|
||||
@@ -2590,7 +2611,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-grpc",
|
||||
@@ -2603,7 +2624,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2621,7 +2642,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"cargo-manifest",
|
||||
@@ -2632,7 +2653,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2655,7 +2676,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-workload"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"common-telemetry",
|
||||
"serde",
|
||||
@@ -3717,9 +3738,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-pg-catalog"
|
||||
version = "0.11.0"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f258caedd1593e7dca3bf53912249de6685fa224bcce897ede1fbb7b040ac6f6"
|
||||
checksum = "15824c98ff2009c23b0398d441499b147f7c5ac0e5ee993e7a473d79040e3626"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"datafusion",
|
||||
@@ -3892,7 +3913,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3956,7 +3977,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4628,7 +4649,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4760,7 +4781,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -4829,7 +4850,7 @@ dependencies = [
|
||||
"sql",
|
||||
"store-api",
|
||||
"strum 0.27.1",
|
||||
"substrait 0.18.0",
|
||||
"substrait 1.0.0-beta.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.13.1",
|
||||
@@ -4884,7 +4905,7 @@ checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -6095,7 +6116,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -6307,17 +6328,6 @@ dependencies = [
|
||||
"derive_utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "io-uring"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.11.0"
|
||||
@@ -7035,7 +7045,7 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -7047,7 +7057,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -7354,7 +7364,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7382,7 +7392,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7480,7 +7490,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7498,6 +7508,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"common-wal",
|
||||
"datafusion",
|
||||
"datatypes",
|
||||
"futures-util",
|
||||
@@ -7574,7 +7585,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito-codec"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bytes",
|
||||
@@ -7599,7 +7610,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -8337,7 +8348,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -8622,7 +8633,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -8680,7 +8691,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser",
|
||||
"store-api",
|
||||
"substrait 0.18.0",
|
||||
"substrait 1.0.0-beta.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -8966,7 +8977,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9265,6 +9276,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
"uncased",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9310,7 +9322,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -9466,7 +9478,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.40",
|
||||
@@ -9766,7 +9778,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"async-trait",
|
||||
@@ -10049,7 +10061,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-compression 0.4.19",
|
||||
"async-trait",
|
||||
@@ -10091,7 +10103,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -10115,6 +10127,7 @@ dependencies = [
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
"common-runtime",
|
||||
"common-stat",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
@@ -10157,7 +10170,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser",
|
||||
"store-api",
|
||||
"substrait 0.18.0",
|
||||
"substrait 1.0.0-beta.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -11493,7 +11506,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -11619,7 +11632,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -11953,7 +11966,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-buffer",
|
||||
@@ -12013,7 +12026,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.40",
|
||||
@@ -12290,7 +12303,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "standalone"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"catalog",
|
||||
@@ -12331,7 +12344,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -12496,28 +12509,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.18.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-function",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-substrait",
|
||||
"datatypes",
|
||||
"promql",
|
||||
"prost 0.13.5",
|
||||
"snafu 0.8.6",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.37.3"
|
||||
@@ -12564,6 +12555,28 @@ dependencies = [
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-function",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-substrait",
|
||||
"datatypes",
|
||||
"promql",
|
||||
"prost 0.13.5",
|
||||
"snafu 0.8.6",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.6.1"
|
||||
@@ -12667,7 +12680,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12936,7 +12949,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -12980,7 +12993,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -13054,7 +13067,7 @@ dependencies = [
|
||||
"sqlx",
|
||||
"standalone",
|
||||
"store-api",
|
||||
"substrait 0.18.0",
|
||||
"substrait 1.0.0-beta.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
@@ -13256,23 +13269,20 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.47.1"
|
||||
version = "1.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
|
||||
checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
"io-uring",
|
||||
"libc",
|
||||
"mio",
|
||||
"parking_lot 0.12.4",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"slab",
|
||||
"socket2 0.6.0",
|
||||
"tokio-macros",
|
||||
"tracing",
|
||||
"windows-sys 0.59.0",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -13287,9 +13297,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "2.5.0"
|
||||
version = "2.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
|
||||
checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -13967,6 +13977,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "uncased"
|
||||
version = "0.9.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
|
||||
dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unescaper"
|
||||
version = "0.1.6"
|
||||
@@ -14711,6 +14730,15 @@ dependencies = [
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.5"
|
||||
|
||||
14
Cargo.toml
14
Cargo.toml
@@ -74,7 +74,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.18.0"
|
||||
version = "1.0.0-beta.1"
|
||||
edition = "2024"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -118,9 +118,10 @@ bitflags = "2.4.1"
|
||||
bytemuck = "1.12"
|
||||
bytes = { version = "1.7", features = ["serde"] }
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
chrono-tz = "0.10.1"
|
||||
chrono-tz = { version = "0.10.1", features = ["case-insensitive"] }
|
||||
clap = { version = "4.4", features = ["derive"] }
|
||||
config = "0.13.0"
|
||||
const_format = "0.2"
|
||||
crossbeam-utils = "0.8"
|
||||
dashmap = "6.1"
|
||||
datafusion = "50"
|
||||
@@ -130,7 +131,7 @@ datafusion-functions = "50"
|
||||
datafusion-functions-aggregate-common = "50"
|
||||
datafusion-optimizer = "50"
|
||||
datafusion-orc = "0.5"
|
||||
datafusion-pg-catalog = "0.11"
|
||||
datafusion-pg-catalog = "0.12.1"
|
||||
datafusion-physical-expr = "50"
|
||||
datafusion-physical-plan = "50"
|
||||
datafusion-sql = "50"
|
||||
@@ -218,12 +219,7 @@ similar-asserts = "1.6.0"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
sqlparser = { version = "0.58.0", default-features = false, features = ["std", "visitor", "serde"] }
|
||||
sqlx = { version = "0.8", features = [
|
||||
"runtime-tokio-rustls",
|
||||
"mysql",
|
||||
"postgres",
|
||||
"chrono",
|
||||
] }
|
||||
sqlx = { version = "0.8", default-features = false, features = ["any", "macros", "json", "runtime-tokio-rustls"] }
|
||||
strum = { version = "0.27", features = ["derive"] }
|
||||
sysinfo = "0.33"
|
||||
tempfile = "3"
|
||||
|
||||
72
README.md
72
README.md
@@ -12,8 +12,7 @@
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
<a href="https://greptime.com/product/cloud">GreptimeCloud</a> |
|
||||
<a href="https://docs.greptime.com/">User Guide</a> |
|
||||
<a href="https://docs.greptime.com/user-guide/overview/">User Guide</a> |
|
||||
<a href="https://greptimedb.rs/">API Docs</a> |
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/issues/5446">Roadmap 2025</a>
|
||||
</h4>
|
||||
@@ -67,17 +66,24 @@
|
||||
|
||||
## Introduction
|
||||
|
||||
**GreptimeDB** is an open-source, cloud-native database purpose-built for the unified collection and analysis of observability data (metrics, logs, and traces). Whether you’re operating on the edge, in the cloud, or across hybrid environments, GreptimeDB empowers real-time insights at massive scale — all in one system.
|
||||
**GreptimeDB** is an open-source, cloud-native database that unifies metrics, logs, and traces, enabling real-time observability at any scale — across edge, cloud, and hybrid environments.
|
||||
|
||||
## Features
|
||||
|
||||
| Feature | Description |
|
||||
| --------- | ----------- |
|
||||
| [Unified Observability Data](https://docs.greptime.com/user-guide/concepts/why-greptimedb) | Store metrics, logs, and traces as timestamped, contextual wide events. Query via [SQL](https://docs.greptime.com/user-guide/query-data/sql), [PromQL](https://docs.greptime.com/user-guide/query-data/promql), and [streaming](https://docs.greptime.com/user-guide/flow-computation/overview). |
|
||||
| [High Performance & Cost Effective](https://docs.greptime.com/user-guide/manage-data/data-index) | Written in Rust, with a distributed query engine, [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index), and optimized columnar storage, delivering sub-second responses at PB scale. |
|
||||
| [Cloud-Native Architecture](https://docs.greptime.com/user-guide/concepts/architecture) | Designed for [Kubernetes](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/greptimedb-operator-management), with compute/storage separation, native object storage (AWS S3, Azure Blob, etc.) and seamless cross-cloud access. |
|
||||
| [Developer-Friendly](https://docs.greptime.com/user-guide/protocols/overview) | Access via SQL/PromQL interfaces, REST API, MySQL/PostgreSQL protocols, and popular ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview). |
|
||||
| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments-administration/overview) | Deploy anywhere: edge (including ARM/[Android](https://docs.greptime.com/user-guide/deployments-administration/run-on-android)) or cloud, with unified APIs and efficient data sync. |
|
||||
| [All-in-One Observability](https://docs.greptime.com/user-guide/concepts/why-greptimedb) | OpenTelemetry-native platform unifying metrics, logs, and traces. Query via [SQL](https://docs.greptime.com/user-guide/query-data/sql), [PromQL](https://docs.greptime.com/user-guide/query-data/promql), and [Flow](https://docs.greptime.com/user-guide/flow-computation/overview). |
|
||||
| [High Performance](https://docs.greptime.com/user-guide/manage-data/data-index) | Written in Rust with [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index) (inverted, fulltext, skipping, vector), delivering sub-second responses at PB scale. |
|
||||
| [Cost Efficiency](https://docs.greptime.com/user-guide/concepts/architecture) | 50x lower operational and storage costs with compute-storage separation and native object storage (S3, Azure Blob, etc.). |
|
||||
| [Cloud-Native & Scalable](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/greptimedb-operator-management) | Purpose-built for [Kubernetes](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/greptimedb-operator-management) with unlimited cross-cloud scaling, handling hundreds of thousands of concurrent requests. |
|
||||
| [Developer-Friendly](https://docs.greptime.com/user-guide/protocols/overview) | SQL/PromQL interfaces, built-in web dashboard, REST API, MySQL/PostgreSQL protocol compatibility, and native [OpenTelemetry](https://docs.greptime.com/user-guide/ingest-data/for-observability/opentelemetry/) support. |
|
||||
| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments-administration/overview) | Deploy anywhere from ARM-based edge devices (including [Android](https://docs.greptime.com/user-guide/deployments-administration/run-on-android)) to cloud, with unified APIs and efficient data sync. |
|
||||
|
||||
✅ **Perfect for:**
|
||||
- Unified observability stack replacing Prometheus + Loki + Tempo
|
||||
- Large-scale metrics with high cardinality (millions to billions of time series)
|
||||
- Large-scale observability platform requiring cost efficiency and scalability
|
||||
- IoT and edge computing with resource and bandwidth constraints
|
||||
|
||||
Learn more in [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb) and [Observability 2.0 and the Database for It](https://greptime.com/blogs/2025-04-25-greptimedb-observability2-new-database).
|
||||
|
||||
@@ -86,10 +92,10 @@ Learn more in [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why
|
||||
| Feature | GreptimeDB | Traditional TSDB | Log Stores |
|
||||
|----------------------------------|-----------------------|--------------------|-----------------|
|
||||
| Data Types | Metrics, Logs, Traces | Metrics only | Logs only |
|
||||
| Query Language | SQL, PromQL, Streaming| Custom/PromQL | Custom/DSL |
|
||||
| Query Language | SQL, PromQL | Custom/PromQL | Custom/DSL |
|
||||
| Deployment | Edge + Cloud | Cloud/On-prem | Mostly central |
|
||||
| Indexing & Performance | PB-Scale, Sub-second | Varies | Varies |
|
||||
| Integration | REST, SQL, Common protocols | Varies | Varies |
|
||||
| Integration | REST API, SQL, Common protocols | Varies | Varies |
|
||||
|
||||
**Performance:**
|
||||
* [GreptimeDB tops JSONBench's billion-record cold run test!](https://greptime.com/blogs/2025-03-18-jsonbench-greptimedb-performance)
|
||||
@@ -99,22 +105,18 @@ Read [more benchmark reports](https://docs.greptime.com/user-guide/concepts/feat
|
||||
|
||||
## Architecture
|
||||
|
||||
* Read the [architecture](https://docs.greptime.com/contributor-guide/overview/#architecture) document.
|
||||
* [DeepWiki](https://deepwiki.com/GreptimeTeam/greptimedb/1-overview) provides an in-depth look at GreptimeDB:
|
||||
GreptimeDB can run in two modes:
|
||||
* **Standalone Mode** - Single binary for development and small deployments
|
||||
* **Distributed Mode** - Separate components for production scale:
|
||||
- Frontend: Query processing and protocol handling
|
||||
- Datanode: Data storage and retrieval
|
||||
- Metasrv: Metadata management and coordination
|
||||
|
||||
Read the [architecture](https://docs.greptime.com/contributor-guide/overview/#architecture) document. [DeepWiki](https://deepwiki.com/GreptimeTeam/greptimedb/1-overview) provides an in-depth look at GreptimeDB:
|
||||
<img alt="GreptimeDB System Overview" src="docs/architecture.png">
|
||||
|
||||
## Try GreptimeDB
|
||||
|
||||
### 1. [Live Demo](https://greptime.com/playground)
|
||||
|
||||
Experience GreptimeDB directly in your browser.
|
||||
|
||||
### 2. [GreptimeCloud](https://console.greptime.cloud/)
|
||||
|
||||
Start instantly with a free cluster.
|
||||
|
||||
### 3. Docker (Local Quickstart)
|
||||
|
||||
```shell
|
||||
docker pull greptime/greptimedb
|
||||
```
|
||||
@@ -130,7 +132,8 @@ docker run -p 127.0.0.1:4000-4003:4000-4003 \
|
||||
--postgres-addr 0.0.0.0:4003
|
||||
```
|
||||
Dashboard: [http://localhost:4000/dashboard](http://localhost:4000/dashboard)
|
||||
[Full Install Guide](https://docs.greptime.com/getting-started/installation/overview)
|
||||
|
||||
Read more in the [full Install Guide](https://docs.greptime.com/getting-started/installation/overview).
|
||||
|
||||
**Troubleshooting:**
|
||||
* Cannot connect to the database? Ensure that ports `4000`, `4001`, `4002`, and `4003` are not blocked by a firewall or used by other services.
|
||||
@@ -159,21 +162,26 @@ cargo run -- standalone start
|
||||
|
||||
## Tools & Extensions
|
||||
|
||||
- **Kubernetes:** [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator)
|
||||
- **Helm Charts:** [Greptime Helm Charts](https://github.com/GreptimeTeam/helm-charts)
|
||||
- **Dashboard:** [Web UI](https://github.com/GreptimeTeam/dashboard)
|
||||
- **SDKs/Ingester:** [Go](https://github.com/GreptimeTeam/greptimedb-ingester-go), [Java](https://github.com/GreptimeTeam/greptimedb-ingester-java), [C++](https://github.com/GreptimeTeam/greptimedb-ingester-cpp), [Erlang](https://github.com/GreptimeTeam/greptimedb-ingester-erl), [Rust](https://github.com/GreptimeTeam/greptimedb-ingester-rust), [JS](https://github.com/GreptimeTeam/greptimedb-ingester-js)
|
||||
- **Grafana**: [Official Dashboard](https://github.com/GreptimeTeam/greptimedb/blob/main/grafana/README.md)
|
||||
- **Kubernetes**: [GreptimeDB Operator](https://github.com/GrepTimeTeam/greptimedb-operator)
|
||||
- **Helm Charts**: [Greptime Helm Charts](https://github.com/GreptimeTeam/helm-charts)
|
||||
- **Dashboard**: [Web UI](https://github.com/GreptimeTeam/dashboard)
|
||||
- **gRPC Ingester**: [Go](https://github.com/GreptimeTeam/greptimedb-ingester-go), [Java](https://github.com/GreptimeTeam/greptimedb-ingester-java), [C++](https://github.com/GreptimeTeam/greptimedb-ingester-cpp), [Erlang](https://github.com/GreptimeTeam/greptimedb-ingester-erl), [Rust](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
|
||||
- **Grafana Data Source**: [GreptimeDB Grafana data source plugin](https://github.com/GreptimeTeam/greptimedb-grafana-datasource)
|
||||
- **Grafana Dashboard**: [Official Dashboard for monitoring](https://github.com/GreptimeTeam/greptimedb/blob/main/grafana/README.md)
|
||||
|
||||
## Project Status
|
||||
|
||||
> **Status:** Beta.
|
||||
> **GA (v1.0):** Targeted for mid 2025.
|
||||
> **Status:** Beta — marching toward v1.0 GA!
|
||||
> **GA (v1.0):** January 10, 2026
|
||||
|
||||
- Being used in production by early adopters
|
||||
- Deployed in production by open-source projects and commercial users
|
||||
- Stable, actively maintained, with regular releases ([version info](https://docs.greptime.com/nightly/reference/about-greptimedb-version))
|
||||
- Suitable for evaluation and pilot deployments
|
||||
|
||||
GreptimeDB v1.0 represents a major milestone toward maturity — marking stable APIs, production readiness, and proven performance.
|
||||
|
||||
**Roadmap:** Beta1 (Nov 10) → Beta2 (Nov 24) → RC1 (Dec 8) → GA (Jan 10, 2026), please read [v1.0 highlights and release plan](https://greptime.com/blogs/2025-11-05-greptimedb-v1-highlights) for details.
|
||||
|
||||
For production use, we recommend using the latest stable release.
|
||||
[](https://www.star-history.com/#GreptimeTeam/GreptimeDB&Date)
|
||||
|
||||
@@ -214,5 +222,5 @@ Special thanks to all contributors! See [AUTHORS.md](https://github.com/Greptime
|
||||
|
||||
- Uses [Apache Arrow™](https://arrow.apache.org/) (memory model)
|
||||
- [Apache Parquet™](https://parquet.apache.org/) (file storage)
|
||||
- [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/) (query engine)
|
||||
- [Apache DataFusion™](https://arrow.apache.org/datafusion/) (query engine)
|
||||
- [Apache OpenDAL™](https://opendal.apache.org/) (data access abstraction)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
| `default_column_prefix` | String | Unset | The default column prefix for auto-created time index and value columns. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited.<br/>NOTE: This setting affects scan_memory_limit's privileged tier allocation.<br/>When set, 70% of queries get privileged memory access (full scan_memory_limit).<br/>The remaining 30% get standard tier access (70% of scan_memory_limit). |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `max_in_flight_write_bytes` | String | Unset | The maximum in-flight write bytes. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
@@ -104,6 +104,7 @@
|
||||
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
|
||||
| `query` | -- | -- | The query engine options. |
|
||||
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
|
||||
| `query.memory_pool_size` | String | `50%` | Memory pool size for query execution operators (aggregation, sorting, join).<br/>Supports absolute size (e.g., "2GB", "4GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit (unbounded, default behavior).<br/>When this limit is reached, queries will fail with ResourceExhausted error.<br/>NOTE: This does NOT limit memory used by table scans. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `./greptimedb_data` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
@@ -151,10 +152,13 @@
|
||||
| `region_engine.mito.write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}`. |
|
||||
| `region_engine.mito.write_cache_size` | String | `5GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.preload_index_cache` | Bool | `true` | Preload index (puffin) files into cache on region open (default: true).<br/>When enabled, index files are loaded into the write cache during region initialization,<br/>which can improve query performance at the cost of longer startup times. |
|
||||
| `region_engine.mito.index_cache_percent` | Integer | `20` | Percentage of write cache capacity allocated for index (puffin) files (default: 20).<br/>The remaining capacity is used for data (parquet) files.<br/>Must be between 0 and 100 (exclusive). For example, with a 5GiB write cache and 20% allocation,<br/>1GiB is reserved for index files and 4GiB for data files. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.scan_memory_limit` | String | `50%` | Memory limit for table scans across all queries.<br/>Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit.<br/>NOTE: Works with max_concurrent_queries for tiered memory allocation.<br/>- If max_concurrent_queries is set: 70% of queries get full access, 30% get 70% access.<br/>- If max_concurrent_queries is 0 (unlimited): first 20 queries get full access, rest get 70% access. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.default_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format as the default format. |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
@@ -188,7 +192,7 @@
|
||||
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.file` | -- | -- | Enable the file engine. |
|
||||
| `region_engine.metric` | -- | -- | Metric engine options. |
|
||||
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
|
||||
| `region_engine.metric.sparse_primary_key_encoding` | Bool | `true` | Whether to use sparse primary key encoding. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
@@ -308,6 +312,7 @@
|
||||
| `query` | -- | -- | The query engine options. |
|
||||
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
|
||||
| `query.allow_query_fallback` | Bool | `false` | Whether to allow query fallback when push down optimize fails.<br/>Default to false, meaning when push down optimize failed, return error msg |
|
||||
| `query.memory_pool_size` | String | `50%` | Memory pool size for query execution operators (aggregation, sorting, join).<br/>Supports absolute size (e.g., "4GB", "8GB") or percentage of system memory (e.g., "30%").<br/>Setting it to 0 disables the limit (unbounded, default behavior).<br/>When this limit is reached, queries will fail with ResourceExhausted error.<br/>NOTE: This does NOT limit memory used by table scans (only applies to datanodes). |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
@@ -446,7 +451,7 @@
|
||||
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
|
||||
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
|
||||
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
|
||||
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited.<br/>NOTE: This setting affects scan_memory_limit's privileged tier allocation.<br/>When set, 70% of queries get privileged memory access (full scan_memory_limit).<br/>The remaining 30% get standard tier access (70% of scan_memory_limit). |
|
||||
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
@@ -500,6 +505,7 @@
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `query` | -- | -- | The query engine options. |
|
||||
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
|
||||
| `query.memory_pool_size` | String | `50%` | Memory pool size for query execution operators (aggregation, sorting, join).<br/>Supports absolute size (e.g., "2GB", "4GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit (unbounded, default behavior).<br/>When this limit is reached, queries will fail with ResourceExhausted error.<br/>NOTE: This does NOT limit memory used by table scans. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `./greptimedb_data` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
@@ -549,10 +555,13 @@
|
||||
| `region_engine.mito.write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}`. |
|
||||
| `region_engine.mito.write_cache_size` | String | `5GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.preload_index_cache` | Bool | `true` | Preload index (puffin) files into cache on region open (default: true).<br/>When enabled, index files are loaded into the write cache during region initialization,<br/>which can improve query performance at the cost of longer startup times. |
|
||||
| `region_engine.mito.index_cache_percent` | Integer | `20` | Percentage of write cache capacity allocated for index (puffin) files (default: 20).<br/>The remaining capacity is used for data (parquet) files.<br/>Must be between 0 and 100 (exclusive). For example, with a 5GiB write cache and 20% allocation,<br/>1GiB is reserved for index files and 4GiB for data files. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.scan_memory_limit` | String | `50%` | Memory limit for table scans across all queries.<br/>Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit.<br/>NOTE: Works with max_concurrent_queries for tiered memory allocation.<br/>- If max_concurrent_queries is set: 70% of queries get full access, 30% get 70% access.<br/>- If max_concurrent_queries is 0 (unlimited): first 20 queries get full access, rest get 70% access. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.default_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format as the default format. |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
@@ -586,7 +595,7 @@
|
||||
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
|
||||
| `region_engine.file` | -- | -- | Enable the file engine. |
|
||||
| `region_engine.metric` | -- | -- | Metric engine options. |
|
||||
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
|
||||
| `region_engine.metric.sparse_primary_key_encoding` | Bool | `true` | Whether to use sparse primary key encoding. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
@@ -673,5 +682,6 @@
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `query` | -- | -- | -- |
|
||||
| `query.parallelism` | Integer | `1` | Parallelism of the query engine for query sent by flownode.<br/>Default to 1, so it won't use too much cpu or memory |
|
||||
| `query.memory_pool_size` | String | `50%` | Memory pool size for query execution operators (aggregation, sorting, join).<br/>Supports absolute size (e.g., "1GB", "2GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit (unbounded, default behavior).<br/>When this limit is reached, queries will fail with ResourceExhausted error.<br/>NOTE: This does NOT limit memory used by table scans. |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
|
||||
@@ -18,6 +18,9 @@ init_regions_in_background = false
|
||||
init_regions_parallelism = 16
|
||||
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
## NOTE: This setting affects scan_memory_limit's privileged tier allocation.
|
||||
## When set, 70% of queries get privileged memory access (full scan_memory_limit).
|
||||
## The remaining 30% get standard tier access (70% of scan_memory_limit).
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
@@ -261,6 +264,13 @@ overwrite_entry_start_id = false
|
||||
## Default to 0, which means the number of CPU cores.
|
||||
parallelism = 0
|
||||
|
||||
## Memory pool size for query execution operators (aggregation, sorting, join).
|
||||
## Supports absolute size (e.g., "2GB", "4GB") or percentage of system memory (e.g., "20%").
|
||||
## Setting it to 0 disables the limit (unbounded, default behavior).
|
||||
## When this limit is reached, queries will fail with ResourceExhausted error.
|
||||
## NOTE: This does NOT limit memory used by table scans.
|
||||
memory_pool_size = "50%"
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
## The working home directory.
|
||||
@@ -489,6 +499,17 @@ write_cache_size = "5GiB"
|
||||
## @toml2docs:none-default
|
||||
write_cache_ttl = "8h"
|
||||
|
||||
## Preload index (puffin) files into cache on region open (default: true).
|
||||
## When enabled, index files are loaded into the write cache during region initialization,
|
||||
## which can improve query performance at the cost of longer startup times.
|
||||
preload_index_cache = true
|
||||
|
||||
## Percentage of write cache capacity allocated for index (puffin) files (default: 20).
|
||||
## The remaining capacity is used for data (parquet) files.
|
||||
## Must be between 0 and 100 (exclusive). For example, with a 5GiB write cache and 20% allocation,
|
||||
## 1GiB is reserved for index files and 4GiB for data files.
|
||||
index_cache_percent = 20
|
||||
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
|
||||
@@ -501,6 +522,14 @@ max_concurrent_scan_files = 384
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
## Memory limit for table scans across all queries.
|
||||
## Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").
|
||||
## Setting it to 0 disables the limit.
|
||||
## NOTE: Works with max_concurrent_queries for tiered memory allocation.
|
||||
## - If max_concurrent_queries is set: 70% of queries get full access, 30% get 70% access.
|
||||
## - If max_concurrent_queries is 0 (unlimited): first 20 queries get full access, rest get 70% access.
|
||||
scan_memory_limit = "50%"
|
||||
|
||||
## Minimum time interval between two compactions.
|
||||
## To align with the old behavior, the default value is 0 (no restrictions).
|
||||
min_compaction_interval = "0m"
|
||||
@@ -640,8 +669,8 @@ fork_dictionary_bytes = "1GiB"
|
||||
[[region_engine]]
|
||||
## Metric engine options.
|
||||
[region_engine.metric]
|
||||
## Whether to enable the experimental sparse primary key encoding.
|
||||
experimental_sparse_primary_key_encoding = false
|
||||
## Whether to use sparse primary key encoding.
|
||||
sparse_primary_key_encoding = true
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
|
||||
@@ -158,6 +158,13 @@ default_ratio = 1.0
|
||||
## Default to 1, so it won't use too much cpu or memory
|
||||
parallelism = 1
|
||||
|
||||
## Memory pool size for query execution operators (aggregation, sorting, join).
|
||||
## Supports absolute size (e.g., "1GB", "2GB") or percentage of system memory (e.g., "20%").
|
||||
## Setting it to 0 disables the limit (unbounded, default behavior).
|
||||
## When this limit is reached, queries will fail with ResourceExhausted error.
|
||||
## NOTE: This does NOT limit memory used by table scans.
|
||||
memory_pool_size = "50%"
|
||||
|
||||
## The memory options.
|
||||
[memory]
|
||||
## Whether to enable heap profiling activation during startup.
|
||||
|
||||
@@ -256,6 +256,13 @@ parallelism = 0
|
||||
## Default to false, meaning when push down optimize failed, return error msg
|
||||
allow_query_fallback = false
|
||||
|
||||
## Memory pool size for query execution operators (aggregation, sorting, join).
|
||||
## Supports absolute size (e.g., "4GB", "8GB") or percentage of system memory (e.g., "30%").
|
||||
## Setting it to 0 disables the limit (unbounded, default behavior).
|
||||
## When this limit is reached, queries will fail with ResourceExhausted error.
|
||||
## NOTE: This does NOT limit memory used by table scans (only applies to datanodes).
|
||||
memory_pool_size = "50%"
|
||||
|
||||
## Datanode options.
|
||||
[datanode]
|
||||
## Datanode client options.
|
||||
|
||||
@@ -14,6 +14,9 @@ init_regions_in_background = false
|
||||
init_regions_parallelism = 16
|
||||
|
||||
## The maximum current queries allowed to be executed. Zero means unlimited.
|
||||
## NOTE: This setting affects scan_memory_limit's privileged tier allocation.
|
||||
## When set, 70% of queries get privileged memory access (full scan_memory_limit).
|
||||
## The remaining 30% get standard tier access (70% of scan_memory_limit).
|
||||
max_concurrent_queries = 0
|
||||
|
||||
## Enable telemetry to collect anonymous usage data. Enabled by default.
|
||||
@@ -365,6 +368,13 @@ max_running_procedures = 128
|
||||
## Default to 0, which means the number of CPU cores.
|
||||
parallelism = 0
|
||||
|
||||
## Memory pool size for query execution operators (aggregation, sorting, join).
|
||||
## Supports absolute size (e.g., "2GB", "4GB") or percentage of system memory (e.g., "20%").
|
||||
## Setting it to 0 disables the limit (unbounded, default behavior).
|
||||
## When this limit is reached, queries will fail with ResourceExhausted error.
|
||||
## NOTE: This does NOT limit memory used by table scans.
|
||||
memory_pool_size = "50%"
|
||||
|
||||
## The data storage options.
|
||||
[storage]
|
||||
## The working home directory.
|
||||
@@ -580,6 +590,17 @@ write_cache_size = "5GiB"
|
||||
## @toml2docs:none-default
|
||||
write_cache_ttl = "8h"
|
||||
|
||||
## Preload index (puffin) files into cache on region open (default: true).
|
||||
## When enabled, index files are loaded into the write cache during region initialization,
|
||||
## which can improve query performance at the cost of longer startup times.
|
||||
preload_index_cache = true
|
||||
|
||||
## Percentage of write cache capacity allocated for index (puffin) files (default: 20).
|
||||
## The remaining capacity is used for data (parquet) files.
|
||||
## Must be between 0 and 100 (exclusive). For example, with a 5GiB write cache and 20% allocation,
|
||||
## 1GiB is reserved for index files and 4GiB for data files.
|
||||
index_cache_percent = 20
|
||||
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
|
||||
@@ -592,6 +613,14 @@ max_concurrent_scan_files = 384
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
## Memory limit for table scans across all queries.
|
||||
## Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").
|
||||
## Setting it to 0 disables the limit.
|
||||
## NOTE: Works with max_concurrent_queries for tiered memory allocation.
|
||||
## - If max_concurrent_queries is set: 70% of queries get full access, 30% get 70% access.
|
||||
## - If max_concurrent_queries is 0 (unlimited): first 20 queries get full access, rest get 70% access.
|
||||
scan_memory_limit = "50%"
|
||||
|
||||
## Minimum time interval between two compactions.
|
||||
## To align with the old behavior, the default value is 0 (no restrictions).
|
||||
min_compaction_interval = "0m"
|
||||
@@ -731,8 +760,8 @@ fork_dictionary_bytes = "1GiB"
|
||||
[[region_engine]]
|
||||
## Metric engine options.
|
||||
[region_engine.metric]
|
||||
## Whether to enable the experimental sparse primary key encoding.
|
||||
experimental_sparse_primary_key_encoding = false
|
||||
## Whether to use sparse primary key encoding.
|
||||
sparse_primary_key_encoding = true
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
|
||||
@@ -8,6 +8,7 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
arrow-schema.workspace = true
|
||||
common-base.workspace = true
|
||||
common-decimal.workspace = true
|
||||
common-error.workspace = true
|
||||
|
||||
@@ -14,10 +14,11 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY};
|
||||
use datatypes::schema::{
|
||||
COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
|
||||
FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, JSON_STRUCTURE_SETTINGS_KEY,
|
||||
SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType,
|
||||
FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, SkippingIndexOptions,
|
||||
SkippingIndexType,
|
||||
};
|
||||
use greptime_proto::v1::{
|
||||
Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
|
||||
@@ -68,8 +69,14 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
|
||||
if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
|
||||
metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
|
||||
}
|
||||
if let Some(settings) = options.options.get(JSON_STRUCTURE_SETTINGS_KEY) {
|
||||
metadata.insert(JSON_STRUCTURE_SETTINGS_KEY.to_string(), settings.clone());
|
||||
if let Some(extension_name) = options.options.get(EXTENSION_TYPE_NAME_KEY) {
|
||||
metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
|
||||
}
|
||||
if let Some(extension_metadata) = options.options.get(EXTENSION_TYPE_METADATA_KEY) {
|
||||
metadata.insert(
|
||||
EXTENSION_TYPE_METADATA_KEY.to_string(),
|
||||
extension_metadata.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,10 +149,16 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
|
||||
.options
|
||||
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
|
||||
}
|
||||
if let Some(settings) = column_schema.metadata().get(JSON_STRUCTURE_SETTINGS_KEY) {
|
||||
if let Some(extension_name) = column_schema.metadata().get(EXTENSION_TYPE_NAME_KEY) {
|
||||
options
|
||||
.options
|
||||
.insert(JSON_STRUCTURE_SETTINGS_KEY.to_string(), settings.clone());
|
||||
.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
|
||||
}
|
||||
if let Some(extension_metadata) = column_schema.metadata().get(EXTENSION_TYPE_METADATA_KEY) {
|
||||
options.options.insert(
|
||||
EXTENSION_TYPE_METADATA_KEY.to_string(),
|
||||
extension_metadata.clone(),
|
||||
);
|
||||
}
|
||||
|
||||
(!options.options.is_empty()).then_some(options)
|
||||
|
||||
@@ -29,6 +29,7 @@ use crate::information_schema::{InformationExtensionRef, InformationSchemaProvid
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::kvbackend::manager::{CATALOG_CACHE_MAX_CAPACITY, SystemCatalog};
|
||||
use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::numbers_table_provider::NumbersTableProvider;
|
||||
use crate::system_schema::pg_catalog::PGCatalogProvider;
|
||||
|
||||
pub struct KvBackendCatalogManagerBuilder {
|
||||
@@ -119,6 +120,7 @@ impl KvBackendCatalogManagerBuilder {
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
me.clone(),
|
||||
)),
|
||||
numbers_table_provider: NumbersTableProvider,
|
||||
backend,
|
||||
process_manager,
|
||||
#[cfg(feature = "enterprise")]
|
||||
|
||||
@@ -18,8 +18,7 @@ use std::sync::{Arc, Weak};
|
||||
|
||||
use async_stream::try_stream;
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
|
||||
PG_CATALOG_NAME,
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
|
||||
};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::{
|
||||
@@ -45,7 +44,6 @@ use table::TableRef;
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::{TableId, TableInfoRef};
|
||||
use table::table::PartitionRules;
|
||||
use table::table::numbers::{NUMBERS_TABLE_NAME, NumbersTable};
|
||||
use table::table_name::TableName;
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
@@ -61,6 +59,7 @@ use crate::information_schema::{InformationExtensionRef, InformationSchemaProvid
|
||||
use crate::kvbackend::TableCacheRef;
|
||||
use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::SystemSchemaProvider;
|
||||
use crate::system_schema::numbers_table_provider::NumbersTableProvider;
|
||||
use crate::system_schema::pg_catalog::PGCatalogProvider;
|
||||
|
||||
/// Access all existing catalog, schema and tables.
|
||||
@@ -555,6 +554,7 @@ pub(super) struct SystemCatalog {
|
||||
// system_schema_provider for default catalog
|
||||
pub(super) information_schema_provider: Arc<InformationSchemaProvider>,
|
||||
pub(super) pg_catalog_provider: Arc<PGCatalogProvider>,
|
||||
pub(super) numbers_table_provider: NumbersTableProvider,
|
||||
pub(super) backend: KvBackendRef,
|
||||
pub(super) process_manager: Option<ProcessManagerRef>,
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -584,9 +584,7 @@ impl SystemCatalog {
|
||||
PG_CATALOG_NAME if channel == Channel::Postgres => {
|
||||
self.pg_catalog_provider.table_names()
|
||||
}
|
||||
DEFAULT_SCHEMA_NAME => {
|
||||
vec![NUMBERS_TABLE_NAME.to_string()]
|
||||
}
|
||||
DEFAULT_SCHEMA_NAME => self.numbers_table_provider.table_names(),
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
@@ -604,7 +602,7 @@ impl SystemCatalog {
|
||||
if schema == INFORMATION_SCHEMA_NAME {
|
||||
self.information_schema_provider.table(table).is_some()
|
||||
} else if schema == DEFAULT_SCHEMA_NAME {
|
||||
table == NUMBERS_TABLE_NAME
|
||||
self.numbers_table_provider.table_exists(table)
|
||||
} else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
|
||||
self.pg_catalog_provider.table(table).is_some()
|
||||
} else {
|
||||
@@ -649,8 +647,8 @@ impl SystemCatalog {
|
||||
});
|
||||
pg_catalog_provider.table(table_name)
|
||||
}
|
||||
} else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
|
||||
Some(NumbersTable::table(NUMBERS_TABLE_ID))
|
||||
} else if schema == DEFAULT_SCHEMA_NAME {
|
||||
self.numbers_table_provider.table(table_name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
pub mod information_schema;
|
||||
mod memory_table;
|
||||
pub mod numbers_table_provider;
|
||||
pub mod pg_catalog;
|
||||
pub mod predicate;
|
||||
mod utils;
|
||||
|
||||
@@ -97,7 +97,6 @@ lazy_static! {
|
||||
ROUTINES,
|
||||
SCHEMA_PRIVILEGES,
|
||||
TABLE_PRIVILEGES,
|
||||
TRIGGERS,
|
||||
GLOBAL_STATUS,
|
||||
SESSION_STATUS,
|
||||
PARTITIONS,
|
||||
@@ -207,7 +206,6 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
|
||||
ROUTINES => setup_memory_table!(ROUTINES),
|
||||
SCHEMA_PRIVILEGES => setup_memory_table!(SCHEMA_PRIVILEGES),
|
||||
TABLE_PRIVILEGES => setup_memory_table!(TABLE_PRIVILEGES),
|
||||
TRIGGERS => setup_memory_table!(TRIGGERS),
|
||||
GLOBAL_STATUS => setup_memory_table!(GLOBAL_STATUS),
|
||||
SESSION_STATUS => setup_memory_table!(SESSION_STATUS),
|
||||
KEY_COLUMN_USAGE => Some(Arc::new(InformationSchemaKeyColumnUsage::new(
|
||||
|
||||
@@ -15,8 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
|
||||
|
||||
use crate::system_schema::information_schema::table_names::*;
|
||||
@@ -366,16 +365,6 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
vec![],
|
||||
),
|
||||
|
||||
TRIGGERS => (
|
||||
vec![
|
||||
string_column("TRIGGER_NAME"),
|
||||
ColumnSchema::new("trigger_id", ConcreteDataType::uint64_datatype(), false),
|
||||
string_column("TRIGGER_DEFINITION"),
|
||||
ColumnSchema::new("flownode_id", ConcreteDataType::uint64_datatype(), true),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
// TODO: Considering store internal metrics in `global_status` and
|
||||
// `session_status` tables.
|
||||
GLOBAL_STATUS => (
|
||||
|
||||
59
src/catalog/src/system_schema/numbers_table_provider.rs
Normal file
59
src/catalog/src/system_schema/numbers_table_provider.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
use common_catalog::consts::NUMBERS_TABLE_ID;
|
||||
use table::TableRef;
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
use table::table::numbers::NUMBERS_TABLE_NAME;
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
// NumbersTableProvider is a dedicated provider for feature-gating the numbers table.
|
||||
#[derive(Clone)]
|
||||
pub struct NumbersTableProvider;
|
||||
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
impl NumbersTableProvider {
|
||||
pub(crate) fn table_exists(&self, name: &str) -> bool {
|
||||
name == NUMBERS_TABLE_NAME
|
||||
}
|
||||
|
||||
pub(crate) fn table_names(&self) -> Vec<String> {
|
||||
vec![NUMBERS_TABLE_NAME.to_string()]
|
||||
}
|
||||
|
||||
pub(crate) fn table(&self, name: &str) -> Option<TableRef> {
|
||||
if name == NUMBERS_TABLE_NAME {
|
||||
Some(NumbersTable::table(NUMBERS_TABLE_ID))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(test, feature = "testing", debug_assertions)))]
|
||||
impl NumbersTableProvider {
|
||||
pub(crate) fn table_exists(&self, _name: &str) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn table_names(&self) -> Vec<String> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
pub(crate) fn table(&self, _name: &str) -> Option<TableRef> {
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -16,12 +16,15 @@ mod export;
|
||||
mod import;
|
||||
|
||||
use clap::Subcommand;
|
||||
use client::DEFAULT_CATALOG_NAME;
|
||||
use common_error::ext::BoxedError;
|
||||
|
||||
use crate::Tool;
|
||||
use crate::data::export::ExportCommand;
|
||||
use crate::data::import::ImportCommand;
|
||||
|
||||
pub(crate) const COPY_PATH_PLACEHOLDER: &str = "<PATH/TO/FILES>";
|
||||
|
||||
/// Command for data operations including exporting data from and importing data into GreptimeDB.
|
||||
#[derive(Subcommand)]
|
||||
pub enum DataCommand {
|
||||
@@ -37,3 +40,7 @@ impl DataCommand {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn default_database() -> String {
|
||||
format!("{DEFAULT_CATALOG_NAME}-*")
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ use snafu::{OptionExt, ResultExt};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::data::{COPY_PATH_PLACEHOLDER, default_database};
|
||||
use crate::database::{DatabaseClient, parse_proxy_opts};
|
||||
use crate::error::{
|
||||
EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
|
||||
@@ -63,7 +64,7 @@ pub struct ExportCommand {
|
||||
output_dir: Option<String>,
|
||||
|
||||
/// The name of the catalog to export.
|
||||
#[clap(long, default_value = "greptime-*")]
|
||||
#[clap(long, default_value_t = default_database())]
|
||||
database: String,
|
||||
|
||||
/// Parallelism of the export.
|
||||
@@ -667,10 +668,26 @@ impl Export {
|
||||
);
|
||||
|
||||
// Create copy_from.sql file
|
||||
let copy_database_from_sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
|
||||
export_self.catalog, schema, path, with_options_clone, connection_part
|
||||
);
|
||||
let copy_database_from_sql = {
|
||||
let command_without_connection = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({});"#,
|
||||
export_self.catalog, schema, COPY_PATH_PLACEHOLDER, with_options_clone
|
||||
);
|
||||
|
||||
if connection_part.is_empty() {
|
||||
command_without_connection
|
||||
} else {
|
||||
let command_with_connection = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
|
||||
export_self.catalog, schema, path, with_options_clone, connection_part
|
||||
);
|
||||
|
||||
format!(
|
||||
"-- {}\n{}",
|
||||
command_with_connection, command_without_connection
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
|
||||
export_self
|
||||
|
||||
@@ -21,12 +21,13 @@ use clap::{Parser, ValueEnum};
|
||||
use common_catalog::consts::DEFAULT_SCHEMA_NAME;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::data::{COPY_PATH_PLACEHOLDER, default_database};
|
||||
use crate::database::{DatabaseClient, parse_proxy_opts};
|
||||
use crate::error::{Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::error::{Error, FileIoSnafu, InvalidArgumentsSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::{Tool, database};
|
||||
|
||||
#[derive(Debug, Default, Clone, ValueEnum)]
|
||||
@@ -52,7 +53,7 @@ pub struct ImportCommand {
|
||||
input_dir: String,
|
||||
|
||||
/// The name of the catalog to import.
|
||||
#[clap(long, default_value = "greptime-*")]
|
||||
#[clap(long, default_value_t = default_database())]
|
||||
database: String,
|
||||
|
||||
/// Parallelism of the import.
|
||||
@@ -147,12 +148,15 @@ impl Import {
|
||||
let _permit = semaphore_moved.acquire().await.unwrap();
|
||||
let database_input_dir = self.catalog_path().join(&schema);
|
||||
let sql_file = database_input_dir.join(filename);
|
||||
let sql = tokio::fs::read_to_string(sql_file)
|
||||
let mut sql = tokio::fs::read_to_string(sql_file)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
if sql.is_empty() {
|
||||
if sql.trim().is_empty() {
|
||||
info!("Empty `{filename}` {database_input_dir:?}");
|
||||
} else {
|
||||
if filename == "copy_from.sql" {
|
||||
sql = self.rewrite_copy_database_sql(&schema, &sql)?;
|
||||
}
|
||||
let db = exec_db.unwrap_or(&schema);
|
||||
self.database_client.sql(&sql, db).await?;
|
||||
info!("Imported `{filename}` for database {schema}");
|
||||
@@ -225,6 +229,57 @@ impl Import {
|
||||
}
|
||||
Ok(db_names)
|
||||
}
|
||||
|
||||
fn rewrite_copy_database_sql(&self, schema: &str, sql: &str) -> Result<String> {
|
||||
let target_location = self.build_copy_database_location(schema);
|
||||
let escaped_location = target_location.replace('\'', "''");
|
||||
|
||||
let mut first_stmt_checked = false;
|
||||
for line in sql.lines() {
|
||||
let trimmed = line.trim_start();
|
||||
if trimmed.is_empty() || trimmed.starts_with("--") {
|
||||
continue;
|
||||
}
|
||||
|
||||
ensure!(
|
||||
trimmed.starts_with("COPY DATABASE"),
|
||||
InvalidArgumentsSnafu {
|
||||
msg: "Expected COPY DATABASE statement at start of copy_from.sql"
|
||||
}
|
||||
);
|
||||
first_stmt_checked = true;
|
||||
break;
|
||||
}
|
||||
|
||||
ensure!(
|
||||
first_stmt_checked,
|
||||
InvalidArgumentsSnafu {
|
||||
msg: "COPY DATABASE statement not found in copy_from.sql"
|
||||
}
|
||||
);
|
||||
|
||||
ensure!(
|
||||
sql.contains(COPY_PATH_PLACEHOLDER),
|
||||
InvalidArgumentsSnafu {
|
||||
msg: format!(
|
||||
"Placeholder `{}` not found in COPY DATABASE statement",
|
||||
COPY_PATH_PLACEHOLDER
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
Ok(sql.replacen(COPY_PATH_PLACEHOLDER, &escaped_location, 1))
|
||||
}
|
||||
|
||||
fn build_copy_database_location(&self, schema: &str) -> String {
|
||||
let mut path = self.catalog_path();
|
||||
path.push(schema);
|
||||
let mut path_str = path.to_string_lossy().into_owned();
|
||||
if !path_str.ends_with('/') {
|
||||
path_str.push('/');
|
||||
}
|
||||
path_str
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -240,3 +295,52 @@ impl Tool for Import {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn build_import(input_dir: &str) -> Import {
|
||||
Import {
|
||||
catalog: "catalog".to_string(),
|
||||
schema: None,
|
||||
database_client: DatabaseClient::new(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
"catalog".to_string(),
|
||||
None,
|
||||
Duration::from_secs(0),
|
||||
None,
|
||||
),
|
||||
input_dir: input_dir.to_string(),
|
||||
parallelism: 1,
|
||||
target: ImportTarget::Data,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_copy_database_sql_replaces_placeholder() {
|
||||
let import = build_import("/tmp/export-path");
|
||||
let comment = "-- COPY DATABASE \"catalog\".\"schema\" FROM 's3://bucket/demo/' WITH (format = 'parquet') CONNECTION (region = 'us-west-2')";
|
||||
let sql = format!(
|
||||
"{comment}\nCOPY DATABASE \"catalog\".\"schema\" FROM '{}' WITH (format = 'parquet');",
|
||||
COPY_PATH_PLACEHOLDER
|
||||
);
|
||||
|
||||
let rewritten = import.rewrite_copy_database_sql("schema", &sql).unwrap();
|
||||
let expected_location = import.build_copy_database_location("schema");
|
||||
let escaped = expected_location.replace('\'', "''");
|
||||
|
||||
assert!(rewritten.starts_with(comment));
|
||||
assert!(rewritten.contains(&format!("FROM '{escaped}'")));
|
||||
assert!(!rewritten.contains(COPY_PATH_PLACEHOLDER));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_copy_database_sql_requires_placeholder() {
|
||||
let import = build_import("/tmp/export-path");
|
||||
let sql = "COPY DATABASE \"catalog\".\"schema\" FROM '/tmp/export-path/catalog/schema/' WITH (format = 'parquet');";
|
||||
assert!(import.rewrite_copy_database_sql("schema", sql).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,9 @@ use api::v1::health_check_client::HealthCheckClient;
|
||||
use api::v1::prometheus_gateway_client::PrometheusGatewayClient;
|
||||
use api::v1::region::region_client::RegionClient as PbRegionClient;
|
||||
use arrow_flight::flight_service_client::FlightServiceClient;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager, ClientTlsOption};
|
||||
use common_grpc::channel_manager::{
|
||||
ChannelConfig, ChannelManager, ClientTlsOption, load_tls_config,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tonic::codec::CompressionEncoding;
|
||||
@@ -94,8 +96,9 @@ impl Client {
|
||||
A: AsRef<[U]>,
|
||||
{
|
||||
let channel_config = ChannelConfig::default().client_tls_config(client_tls);
|
||||
let channel_manager = ChannelManager::with_tls_config(channel_config)
|
||||
let tls_config = load_tls_config(channel_config.client_tls.as_ref())
|
||||
.context(error::CreateTlsChannelSnafu)?;
|
||||
let channel_manager = ChannelManager::with_config(channel_config, tls_config);
|
||||
Ok(Self::with_manager_and_urls(channel_manager, urls))
|
||||
}
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ impl FlownodeManager for NodeClients {
|
||||
impl NodeClients {
|
||||
pub fn new(config: ChannelConfig) -> Self {
|
||||
Self {
|
||||
channel_manager: ChannelManager::with_config(config),
|
||||
channel_manager: ChannelManager::with_config(config, None),
|
||||
clients: CacheBuilder::new(1024)
|
||||
.time_to_live(Duration::from_secs(30 * 60))
|
||||
.time_to_idle(Duration::from_secs(5 * 60))
|
||||
|
||||
@@ -29,9 +29,11 @@ base64.workspace = true
|
||||
cache.workspace = true
|
||||
catalog.workspace = true
|
||||
chrono.workspace = true
|
||||
either = "1.15"
|
||||
clap.workspace = true
|
||||
cli.workspace = true
|
||||
client.workspace = true
|
||||
colored = "2.1.0"
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
@@ -63,9 +65,11 @@ lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
meta-srv.workspace = true
|
||||
metric-engine.workspace = true
|
||||
mito2.workspace = true
|
||||
moka.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
object-store.workspace = true
|
||||
parquet = { workspace = true, features = ["object_store"] }
|
||||
plugins.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
@@ -88,6 +92,11 @@ toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing-appender.workspace = true
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
pprof = { version = "0.14", features = [
|
||||
"flamegraph",
|
||||
] }
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
tikv-jemallocator = "0.6"
|
||||
|
||||
|
||||
@@ -103,12 +103,15 @@ async fn main_body() -> Result<()> {
|
||||
|
||||
async fn start(cli: Command) -> Result<()> {
|
||||
match cli.subcmd {
|
||||
SubCommand::Datanode(cmd) => {
|
||||
let opts = cmd.load_options(&cli.global_options)?;
|
||||
let plugins = Plugins::new();
|
||||
let builder = InstanceBuilder::try_new_with_init(opts, plugins).await?;
|
||||
cmd.build_with(builder).await?.run().await
|
||||
}
|
||||
SubCommand::Datanode(cmd) => match cmd.subcmd {
|
||||
datanode::SubCommand::Start(ref start) => {
|
||||
let opts = start.load_options(&cli.global_options)?;
|
||||
let plugins = Plugins::new();
|
||||
let builder = InstanceBuilder::try_new_with_init(opts, plugins).await?;
|
||||
cmd.build_with(builder).await?.run().await
|
||||
}
|
||||
datanode::SubCommand::Objbench(ref bench) => bench.run().await,
|
||||
},
|
||||
SubCommand::Flownode(cmd) => {
|
||||
cmd.build(cmd.load_options(&cli.global_options)?)
|
||||
.await?
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod builder;
|
||||
#[allow(clippy::print_stdout)]
|
||||
mod objbench;
|
||||
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
@@ -23,13 +25,16 @@ use common_config::Configurable;
|
||||
use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
|
||||
use common_telemetry::{info, warn};
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::RegionEngineConfig;
|
||||
use datanode::datanode::Datanode;
|
||||
use meta_client::MetaClientOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ResultExt, ensure};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::App;
|
||||
use crate::datanode::builder::InstanceBuilder;
|
||||
use crate::datanode::objbench::ObjbenchCommand;
|
||||
use crate::error::{
|
||||
LoadLayeredConfigSnafu, MissingConfigSnafu, Result, ShutdownDatanodeSnafu, StartDatanodeSnafu,
|
||||
};
|
||||
@@ -89,7 +94,7 @@ impl App for Instance {
|
||||
#[derive(Parser)]
|
||||
pub struct Command {
|
||||
#[clap(subcommand)]
|
||||
subcmd: SubCommand,
|
||||
pub subcmd: SubCommand,
|
||||
}
|
||||
|
||||
impl Command {
|
||||
@@ -100,13 +105,26 @@ impl Command {
|
||||
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
|
||||
match &self.subcmd {
|
||||
SubCommand::Start(cmd) => cmd.load_options(global_options),
|
||||
SubCommand::Objbench(_) => {
|
||||
// For objbench command, we don't need to load DatanodeOptions
|
||||
// It's a standalone utility command
|
||||
let mut opts = datanode::config::DatanodeOptions::default();
|
||||
opts.sanitize();
|
||||
Ok(DatanodeOptions {
|
||||
runtime: Default::default(),
|
||||
plugins: Default::default(),
|
||||
component: opts,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
enum SubCommand {
|
||||
pub enum SubCommand {
|
||||
Start(StartCommand),
|
||||
/// Object storage benchmark tool
|
||||
Objbench(ObjbenchCommand),
|
||||
}
|
||||
|
||||
impl SubCommand {
|
||||
@@ -116,12 +134,33 @@ impl SubCommand {
|
||||
info!("Building datanode with {:#?}", cmd);
|
||||
builder.build().await
|
||||
}
|
||||
SubCommand::Objbench(cmd) => {
|
||||
cmd.run().await?;
|
||||
std::process::exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage engine config
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
||||
#[serde(default)]
|
||||
pub struct StorageConfig {
|
||||
/// The working directory of database
|
||||
pub data_home: String,
|
||||
#[serde(flatten)]
|
||||
pub store: object_store::config::ObjectStoreConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
||||
#[serde(default)]
|
||||
struct StorageConfigWrapper {
|
||||
storage: StorageConfig,
|
||||
region_engine: Vec<RegionEngineConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser, Default)]
|
||||
struct StartCommand {
|
||||
pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
/// The address to bind the gRPC server.
|
||||
@@ -149,7 +188,7 @@ struct StartCommand {
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
|
||||
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<DatanodeOptions> {
|
||||
let mut opts = DatanodeOptions::load_layered_options(
|
||||
self.config_file.as_deref(),
|
||||
self.env_prefix.as_ref(),
|
||||
|
||||
677
src/cmd/src/datanode/objbench.rs
Normal file
677
src/cmd/src/datanode/objbench.rs
Normal file
@@ -0,0 +1,677 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
use colored::Colorize;
|
||||
use datanode::config::RegionEngineConfig;
|
||||
use datanode::store;
|
||||
use either::Either;
|
||||
use mito2::access_layer::{
|
||||
AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
|
||||
};
|
||||
use mito2::cache::{CacheManager, CacheManagerRef};
|
||||
use mito2::config::{FulltextIndexConfig, MitoConfig, Mode};
|
||||
use mito2::read::Source;
|
||||
use mito2::sst::file::{FileHandle, FileMeta};
|
||||
use mito2::sst::file_purger::{FilePurger, FilePurgerRef};
|
||||
use mito2::sst::index::intermediate::IntermediateManager;
|
||||
use mito2::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
use mito2::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use mito2::sst::parquet::{PARQUET_METADATA_KEY, WriteOptions};
|
||||
use mito2::worker::write_cache_from_config;
|
||||
use object_store::ObjectStore;
|
||||
use regex::Regex;
|
||||
use snafu::OptionExt;
|
||||
use store_api::metadata::{RegionMetadata, RegionMetadataRef};
|
||||
use store_api::path_utils::region_name;
|
||||
use store_api::region_request::PathType;
|
||||
use store_api::storage::FileId;
|
||||
|
||||
use crate::datanode::{StorageConfig, StorageConfigWrapper};
|
||||
use crate::error;
|
||||
|
||||
/// Object storage benchmark command
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct ObjbenchCommand {
|
||||
/// Path to the object-store config file (TOML). Must deserialize into object_store::config::ObjectStoreConfig.
|
||||
#[clap(long, value_name = "FILE")]
|
||||
pub config: PathBuf,
|
||||
|
||||
/// Source SST file path in object-store (e.g. "region_dir/<uuid>.parquet").
|
||||
#[clap(long, value_name = "PATH")]
|
||||
pub source: String,
|
||||
|
||||
/// Verbose output
|
||||
#[clap(short, long, default_value_t = false)]
|
||||
pub verbose: bool,
|
||||
|
||||
/// Output file path for pprof flamegraph (enables profiling)
|
||||
#[clap(long, value_name = "FILE")]
|
||||
pub pprof_file: Option<PathBuf>,
|
||||
}
|
||||
|
||||
fn parse_config(config_path: &PathBuf) -> error::Result<(StorageConfig, MitoConfig)> {
|
||||
let cfg_str = std::fs::read_to_string(config_path).map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("failed to read config {}: {e}", config_path.display()),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let store_cfg: StorageConfigWrapper = toml::from_str(&cfg_str).map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("failed to parse config {}: {e}", config_path.display()),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let storage_config = store_cfg.storage;
|
||||
let mito_engine_config = store_cfg
|
||||
.region_engine
|
||||
.into_iter()
|
||||
.filter_map(|c| {
|
||||
if let RegionEngineConfig::Mito(mito) = c {
|
||||
Some(mito)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.with_context(|| error::IllegalConfigSnafu {
|
||||
msg: format!("Engine config not found in {:?}", config_path),
|
||||
})?;
|
||||
Ok((storage_config, mito_engine_config))
|
||||
}
|
||||
|
||||
impl ObjbenchCommand {
|
||||
pub async fn run(&self) -> error::Result<()> {
|
||||
if self.verbose {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
}
|
||||
|
||||
println!("{}", "Starting objbench with config:".cyan().bold());
|
||||
|
||||
// Build object store from config
|
||||
let (store_cfg, mut mito_engine_config) = parse_config(&self.config)?;
|
||||
|
||||
let object_store = build_object_store(&store_cfg).await?;
|
||||
println!("{} Object store initialized", "✓".green());
|
||||
|
||||
// Prepare source identifiers
|
||||
let components = parse_file_dir_components(&self.source)?;
|
||||
println!(
|
||||
"{} Source path parsed: {}, components: {:?}",
|
||||
"✓".green(),
|
||||
self.source,
|
||||
components
|
||||
);
|
||||
|
||||
// Load parquet metadata to extract RegionMetadata and file stats
|
||||
println!("{}", "Loading parquet metadata...".yellow());
|
||||
let file_size = object_store
|
||||
.stat(&self.source)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("stat failed: {e}"),
|
||||
}
|
||||
.build()
|
||||
})?
|
||||
.content_length();
|
||||
let parquet_meta = load_parquet_metadata(object_store.clone(), &self.source, file_size)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("read parquet metadata failed: {e}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let region_meta = extract_region_metadata(&self.source, &parquet_meta)?;
|
||||
let num_rows = parquet_meta.file_metadata().num_rows() as u64;
|
||||
let num_row_groups = parquet_meta.num_row_groups() as u64;
|
||||
|
||||
println!(
|
||||
"{} Metadata loaded - rows: {}, size: {} bytes",
|
||||
"✓".green(),
|
||||
num_rows,
|
||||
file_size
|
||||
);
|
||||
|
||||
// Build a FileHandle for the source file
|
||||
let file_meta = FileMeta {
|
||||
region_id: region_meta.region_id,
|
||||
file_id: components.file_id,
|
||||
time_range: Default::default(),
|
||||
level: 0,
|
||||
file_size,
|
||||
available_indexes: Default::default(),
|
||||
index_file_size: 0,
|
||||
index_file_id: None,
|
||||
num_rows,
|
||||
num_row_groups,
|
||||
sequence: None,
|
||||
partition_expr: None,
|
||||
num_series: 0,
|
||||
};
|
||||
let src_handle = FileHandle::new(file_meta, new_noop_file_purger());
|
||||
|
||||
// Build the reader for a single file via ParquetReaderBuilder
|
||||
let table_dir = components.table_dir();
|
||||
let (src_access_layer, cache_manager) = build_access_layer_simple(
|
||||
&components,
|
||||
object_store.clone(),
|
||||
&mut mito_engine_config,
|
||||
&store_cfg.data_home,
|
||||
)
|
||||
.await?;
|
||||
let reader_build_start = Instant::now();
|
||||
|
||||
let reader = ParquetReaderBuilder::new(
|
||||
table_dir,
|
||||
components.path_type,
|
||||
src_handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.expected_metadata(Some(region_meta.clone()))
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("build reader failed: {e:?}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let reader_build_elapsed = reader_build_start.elapsed();
|
||||
let total_rows = reader.parquet_metadata().file_metadata().num_rows();
|
||||
println!("{} Reader built in {:?}", "✓".green(), reader_build_elapsed);
|
||||
|
||||
// Build write request
|
||||
let fulltext_index_config = FulltextIndexConfig {
|
||||
create_on_compaction: Mode::Disable,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let write_req = SstWriteRequest {
|
||||
op_type: OperationType::Flush,
|
||||
metadata: region_meta,
|
||||
source: Either::Left(Source::Reader(Box::new(reader))),
|
||||
cache_manager,
|
||||
storage: None,
|
||||
max_sequence: None,
|
||||
index_options: Default::default(),
|
||||
index_config: mito_engine_config.index.clone(),
|
||||
inverted_index_config: MitoConfig::default().inverted_index,
|
||||
fulltext_index_config,
|
||||
bloom_filter_index_config: MitoConfig::default().bloom_filter_index,
|
||||
};
|
||||
|
||||
// Write SST
|
||||
println!("{}", "Writing SST...".yellow());
|
||||
|
||||
// Start profiling if pprof_file is specified
|
||||
#[cfg(unix)]
|
||||
let profiler_guard = if self.pprof_file.is_some() {
|
||||
println!("{} Starting profiling...", "⚡".yellow());
|
||||
Some(
|
||||
pprof::ProfilerGuardBuilder::default()
|
||||
.frequency(99)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to start profiler: {e}"),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
#[cfg(not(unix))]
|
||||
if self.pprof_file.is_some() {
|
||||
eprintln!(
|
||||
"{}: Profiling is not supported on this platform",
|
||||
"Warning".yellow()
|
||||
);
|
||||
}
|
||||
|
||||
let write_start = Instant::now();
|
||||
let mut metrics = Metrics::new(WriteType::Flush);
|
||||
let infos = src_access_layer
|
||||
.write_sst(write_req, &WriteOptions::default(), &mut metrics)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("write_sst failed: {e:?}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let write_elapsed = write_start.elapsed();
|
||||
|
||||
// Stop profiling and generate flamegraph if enabled
|
||||
#[cfg(unix)]
|
||||
if let (Some(guard), Some(pprof_file)) = (profiler_guard, &self.pprof_file) {
|
||||
println!("{} Generating flamegraph...", "🔥".yellow());
|
||||
match guard.report().build() {
|
||||
Ok(report) => {
|
||||
let mut flamegraph_data = Vec::new();
|
||||
if let Err(e) = report.flamegraph(&mut flamegraph_data) {
|
||||
println!("{}: Failed to generate flamegraph: {}", "Error".red(), e);
|
||||
} else if let Err(e) = std::fs::write(pprof_file, flamegraph_data) {
|
||||
println!(
|
||||
"{}: Failed to write flamegraph to {}: {}",
|
||||
"Error".red(),
|
||||
pprof_file.display(),
|
||||
e
|
||||
);
|
||||
} else {
|
||||
println!(
|
||||
"{} Flamegraph saved to {}",
|
||||
"✓".green(),
|
||||
pprof_file.display().to_string().cyan()
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{}: Failed to generate pprof report: {}", "Error".red(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
assert_eq!(infos.len(), 1);
|
||||
let dst_file_id = infos[0].file_id;
|
||||
let dst_file_path = format!("{}/{}.parquet", components.region_dir(), dst_file_id);
|
||||
let mut dst_index_path = None;
|
||||
if infos[0].index_metadata.file_size > 0 {
|
||||
dst_index_path = Some(format!(
|
||||
"{}/index/{}.puffin",
|
||||
components.region_dir(),
|
||||
dst_file_id
|
||||
));
|
||||
}
|
||||
|
||||
// Report results with ANSI colors
|
||||
println!("\n{} {}", "Write complete!".green().bold(), "✓".green());
|
||||
println!(" {}: {}", "Destination file".bold(), dst_file_path.cyan());
|
||||
println!(" {}: {}", "Rows".bold(), total_rows.to_string().cyan());
|
||||
println!(
|
||||
" {}: {}",
|
||||
"File size".bold(),
|
||||
format!("{} bytes", file_size).cyan()
|
||||
);
|
||||
println!(
|
||||
" {}: {:?}",
|
||||
"Reader build time".bold(),
|
||||
reader_build_elapsed
|
||||
);
|
||||
println!(" {}: {:?}", "Total time".bold(), write_elapsed);
|
||||
|
||||
// Print metrics in a formatted way
|
||||
println!(" {}: {:?}", "Metrics".bold(), metrics,);
|
||||
|
||||
// Print infos
|
||||
println!(" {}: {:?}", "Index".bold(), infos[0].index_metadata);
|
||||
|
||||
// Cleanup
|
||||
println!("\n{}", "Cleaning up...".yellow());
|
||||
object_store.delete(&dst_file_path).await.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to delete dest file {}: {}", dst_file_path, e),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
println!("{} Temporary file {} deleted", "✓".green(), dst_file_path);
|
||||
|
||||
if let Some(index_path) = dst_index_path {
|
||||
object_store.delete(&index_path).await.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to delete dest index file {}: {}", index_path, e),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
println!(
|
||||
"{} Temporary index file {} deleted",
|
||||
"✓".green(),
|
||||
index_path
|
||||
);
|
||||
}
|
||||
|
||||
println!("\n{}", "Benchmark completed successfully!".green().bold());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FileDirComponents {
|
||||
catalog: String,
|
||||
schema: String,
|
||||
table_id: u32,
|
||||
region_sequence: u32,
|
||||
path_type: PathType,
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
impl FileDirComponents {
|
||||
fn table_dir(&self) -> String {
|
||||
format!("data/{}/{}/{}", self.catalog, self.schema, self.table_id)
|
||||
}
|
||||
|
||||
fn region_dir(&self) -> String {
|
||||
let region_name = region_name(self.table_id, self.region_sequence);
|
||||
match self.path_type {
|
||||
PathType::Bare => {
|
||||
format!(
|
||||
"data/{}/{}/{}/{}",
|
||||
self.catalog, self.schema, self.table_id, region_name
|
||||
)
|
||||
}
|
||||
PathType::Data => {
|
||||
format!(
|
||||
"data/{}/{}/{}/{}/data",
|
||||
self.catalog, self.schema, self.table_id, region_name
|
||||
)
|
||||
}
|
||||
PathType::Metadata => {
|
||||
format!(
|
||||
"data/{}/{}/{}/{}/metadata",
|
||||
self.catalog, self.schema, self.table_id, region_name
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_file_dir_components(path: &str) -> error::Result<FileDirComponents> {
|
||||
// Define the regex pattern to match all three path styles
|
||||
let pattern =
|
||||
r"^data/([^/]+)/([^/]+)/([^/]+)/([^/]+)_([^/]+)(?:/data|/metadata)?/(.+).parquet$";
|
||||
|
||||
// Compile the regex
|
||||
let re = Regex::new(pattern).expect("Invalid regex pattern");
|
||||
|
||||
// Determine the path type
|
||||
let path_type = if path.contains("/data/") {
|
||||
PathType::Data
|
||||
} else if path.contains("/metadata/") {
|
||||
PathType::Metadata
|
||||
} else {
|
||||
PathType::Bare
|
||||
};
|
||||
|
||||
// Try to match the path
|
||||
let components = (|| {
|
||||
let captures = re.captures(path)?;
|
||||
if captures.len() != 7 {
|
||||
return None;
|
||||
}
|
||||
let mut components = FileDirComponents {
|
||||
catalog: "".to_string(),
|
||||
schema: "".to_string(),
|
||||
table_id: 0,
|
||||
region_sequence: 0,
|
||||
path_type,
|
||||
file_id: FileId::default(),
|
||||
};
|
||||
// Extract the components
|
||||
components.catalog = captures.get(1)?.as_str().to_string();
|
||||
components.schema = captures.get(2)?.as_str().to_string();
|
||||
components.table_id = captures[3].parse().ok()?;
|
||||
components.region_sequence = captures[5].parse().ok()?;
|
||||
let file_id_str = &captures[6];
|
||||
components.file_id = FileId::parse_str(file_id_str).ok()?;
|
||||
Some(components)
|
||||
})();
|
||||
components.context(error::IllegalConfigSnafu {
|
||||
msg: format!("Expect valid source file path, got: {}", path),
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_region_metadata(
|
||||
file_path: &str,
|
||||
meta: &parquet::file::metadata::ParquetMetaData,
|
||||
) -> error::Result<RegionMetadataRef> {
|
||||
use parquet::format::KeyValue;
|
||||
let kvs: Option<&Vec<KeyValue>> = meta.file_metadata().key_value_metadata();
|
||||
let Some(kvs) = kvs else {
|
||||
return Err(error::IllegalConfigSnafu {
|
||||
msg: format!("{file_path}: missing parquet key_value metadata"),
|
||||
}
|
||||
.build());
|
||||
};
|
||||
let json = kvs
|
||||
.iter()
|
||||
.find(|kv| kv.key == PARQUET_METADATA_KEY)
|
||||
.and_then(|kv| kv.value.as_ref())
|
||||
.ok_or_else(|| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("{file_path}: key {PARQUET_METADATA_KEY} not found or empty"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let region: RegionMetadata = RegionMetadata::from_json(json).map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("invalid region metadata json: {e}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
Ok(Arc::new(region))
|
||||
}
|
||||
|
||||
async fn build_object_store(sc: &StorageConfig) -> error::Result<ObjectStore> {
|
||||
store::new_object_store(sc.store.clone(), &sc.data_home)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to build object store: {e:?}"),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
|
||||
async fn build_access_layer_simple(
|
||||
components: &FileDirComponents,
|
||||
object_store: ObjectStore,
|
||||
config: &mut MitoConfig,
|
||||
data_home: &str,
|
||||
) -> error::Result<(AccessLayerRef, CacheManagerRef)> {
|
||||
let _ = config.index.sanitize(data_home, &config.inverted_index);
|
||||
let puffin_manager = PuffinManagerFactory::new(
|
||||
&config.index.aux_path,
|
||||
config.index.staging_size.as_bytes(),
|
||||
Some(config.index.write_buffer_size.as_bytes() as _),
|
||||
config.index.staging_ttl,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to build access layer: {e:?}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let intermediate_manager = IntermediateManager::init_fs(&config.index.aux_path)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to build IntermediateManager: {e:?}"),
|
||||
}
|
||||
.build()
|
||||
})?
|
||||
.with_buffer_size(Some(config.index.write_buffer_size.as_bytes() as _));
|
||||
|
||||
let cache_manager =
|
||||
build_cache_manager(config, puffin_manager.clone(), intermediate_manager.clone()).await?;
|
||||
let layer = AccessLayer::new(
|
||||
components.table_dir(),
|
||||
components.path_type,
|
||||
object_store,
|
||||
puffin_manager,
|
||||
intermediate_manager,
|
||||
);
|
||||
Ok((Arc::new(layer), cache_manager))
|
||||
}
|
||||
|
||||
async fn build_cache_manager(
|
||||
config: &MitoConfig,
|
||||
puffin_manager: PuffinManagerFactory,
|
||||
intermediate_manager: IntermediateManager,
|
||||
) -> error::Result<CacheManagerRef> {
|
||||
let write_cache = write_cache_from_config(config, puffin_manager, intermediate_manager)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error::IllegalConfigSnafu {
|
||||
msg: format!("Failed to build write cache: {e:?}"),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let cache_manager = Arc::new(
|
||||
CacheManager::builder()
|
||||
.sst_meta_cache_size(config.sst_meta_cache_size.as_bytes())
|
||||
.vector_cache_size(config.vector_cache_size.as_bytes())
|
||||
.page_cache_size(config.page_cache_size.as_bytes())
|
||||
.selector_result_cache_size(config.selector_result_cache_size.as_bytes())
|
||||
.index_metadata_size(config.index.metadata_cache_size.as_bytes())
|
||||
.index_content_size(config.index.content_cache_size.as_bytes())
|
||||
.index_content_page_size(config.index.content_cache_page_size.as_bytes())
|
||||
.index_result_cache_size(config.index.result_cache_size.as_bytes())
|
||||
.puffin_metadata_size(config.index.metadata_cache_size.as_bytes())
|
||||
.write_cache(write_cache)
|
||||
.build(),
|
||||
);
|
||||
Ok(cache_manager)
|
||||
}
|
||||
|
||||
fn new_noop_file_purger() -> FilePurgerRef {
|
||||
#[derive(Debug)]
|
||||
struct Noop;
|
||||
impl FilePurger for Noop {
|
||||
fn remove_file(&self, _file_meta: FileMeta, _is_delete: bool) {}
|
||||
}
|
||||
Arc::new(Noop)
|
||||
}
|
||||
|
||||
async fn load_parquet_metadata(
|
||||
object_store: ObjectStore,
|
||||
path: &str,
|
||||
file_size: u64,
|
||||
) -> Result<parquet::file::metadata::ParquetMetaData, Box<dyn std::error::Error + Send + Sync>> {
|
||||
use parquet::file::FOOTER_SIZE;
|
||||
use parquet::file::metadata::ParquetMetaDataReader;
|
||||
let actual_size = if file_size == 0 {
|
||||
object_store.stat(path).await?.content_length()
|
||||
} else {
|
||||
file_size
|
||||
};
|
||||
if actual_size < FOOTER_SIZE as u64 {
|
||||
return Err("file too small".into());
|
||||
}
|
||||
let prefetch: u64 = 64 * 1024;
|
||||
let start = actual_size.saturating_sub(prefetch);
|
||||
let buffer = object_store
|
||||
.read_with(path)
|
||||
.range(start..actual_size)
|
||||
.await?
|
||||
.to_vec();
|
||||
let buffer_len = buffer.len();
|
||||
let mut footer = [0; 8];
|
||||
footer.copy_from_slice(&buffer[buffer_len - FOOTER_SIZE..]);
|
||||
let footer = ParquetMetaDataReader::decode_footer_tail(&footer)?;
|
||||
let metadata_len = footer.metadata_length() as u64;
|
||||
if actual_size - (FOOTER_SIZE as u64) < metadata_len {
|
||||
return Err("invalid footer/metadata length".into());
|
||||
}
|
||||
if (metadata_len as usize) <= buffer_len - FOOTER_SIZE {
|
||||
let metadata_start = buffer_len - metadata_len as usize - FOOTER_SIZE;
|
||||
let meta = ParquetMetaDataReader::decode_metadata(
|
||||
&buffer[metadata_start..buffer_len - FOOTER_SIZE],
|
||||
)?;
|
||||
Ok(meta)
|
||||
} else {
|
||||
let metadata_start = actual_size - metadata_len - FOOTER_SIZE as u64;
|
||||
let data = object_store
|
||||
.read_with(path)
|
||||
.range(metadata_start..(actual_size - FOOTER_SIZE as u64))
|
||||
.await?
|
||||
.to_vec();
|
||||
let meta = ParquetMetaDataReader::decode_metadata(&data)?;
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use store_api::region_request::PathType;
|
||||
|
||||
use crate::datanode::objbench::{parse_config, parse_file_dir_components};
|
||||
|
||||
#[test]
|
||||
fn test_parse_dir() {
|
||||
let meta_path = "data/greptime/public/1024/1024_0000000000/metadata/00020380-009c-426d-953e-b4e34c15af34.parquet";
|
||||
let c = parse_file_dir_components(meta_path).unwrap();
|
||||
assert_eq!(
|
||||
c.file_id.to_string(),
|
||||
"00020380-009c-426d-953e-b4e34c15af34"
|
||||
);
|
||||
assert_eq!(c.catalog, "greptime");
|
||||
assert_eq!(c.schema, "public");
|
||||
assert_eq!(c.table_id, 1024);
|
||||
assert_eq!(c.region_sequence, 0);
|
||||
assert_eq!(c.path_type, PathType::Metadata);
|
||||
|
||||
let c = parse_file_dir_components(
|
||||
"data/greptime/public/1024/1024_0000000000/data/00020380-009c-426d-953e-b4e34c15af34.parquet",
|
||||
).unwrap();
|
||||
assert_eq!(
|
||||
c.file_id.to_string(),
|
||||
"00020380-009c-426d-953e-b4e34c15af34"
|
||||
);
|
||||
assert_eq!(c.catalog, "greptime");
|
||||
assert_eq!(c.schema, "public");
|
||||
assert_eq!(c.table_id, 1024);
|
||||
assert_eq!(c.region_sequence, 0);
|
||||
assert_eq!(c.path_type, PathType::Data);
|
||||
|
||||
let c = parse_file_dir_components(
|
||||
"data/greptime/public/1024/1024_0000000000/00020380-009c-426d-953e-b4e34c15af34.parquet",
|
||||
).unwrap();
|
||||
assert_eq!(
|
||||
c.file_id.to_string(),
|
||||
"00020380-009c-426d-953e-b4e34c15af34"
|
||||
);
|
||||
assert_eq!(c.catalog, "greptime");
|
||||
assert_eq!(c.schema, "public");
|
||||
assert_eq!(c.table_id, 1024);
|
||||
assert_eq!(c.region_sequence, 0);
|
||||
assert_eq!(c.path_type, PathType::Bare);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_config() {
|
||||
let path = "../../config/datanode.example.toml";
|
||||
let (storage, engine) = parse_config(&PathBuf::from_str(path).unwrap()).unwrap();
|
||||
assert_eq!(storage.data_home, "./greptimedb_data");
|
||||
assert_eq!(engine.index.staging_size, ReadableSize::gb(2));
|
||||
}
|
||||
}
|
||||
@@ -177,6 +177,8 @@ pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_watch: bool,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
#[clap(long)]
|
||||
disable_dashboard: Option<bool>,
|
||||
@@ -230,6 +232,7 @@ impl StartCommand {
|
||||
self.tls_mode.clone(),
|
||||
self.tls_cert_path.clone(),
|
||||
self.tls_key_path.clone(),
|
||||
self.tls_watch,
|
||||
);
|
||||
|
||||
if let Some(addr) = &self.http_addr {
|
||||
|
||||
@@ -228,6 +228,8 @@ pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_watch: bool,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
#[clap(long, default_value = "GREPTIMEDB_STANDALONE")]
|
||||
pub env_prefix: String,
|
||||
@@ -277,6 +279,7 @@ impl StartCommand {
|
||||
self.tls_mode.clone(),
|
||||
self.tls_cert_path.clone(),
|
||||
self.tls_key_path.clone(),
|
||||
self.tls_watch,
|
||||
);
|
||||
|
||||
if let Some(addr) = &self.http_addr {
|
||||
@@ -769,6 +772,9 @@ mod tests {
|
||||
fn test_load_log_options_from_cli() {
|
||||
let cmd = StartCommand {
|
||||
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
|
||||
mysql_addr: Some("127.0.0.1:4002".to_string()),
|
||||
postgres_addr: Some("127.0.0.1:4003".to_string()),
|
||||
tls_watch: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -785,6 +791,8 @@ mod tests {
|
||||
|
||||
assert_eq!("./greptimedb_data/test/logs", opts.logging.dir);
|
||||
assert_eq!("debug", opts.logging.level.unwrap());
|
||||
assert!(opts.mysql.tls.watch);
|
||||
assert!(opts.postgres.tls.watch);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use cmd::options::GreptimeOptions;
|
||||
use common_base::memory_limit::MemoryLimit;
|
||||
use common_config::{Configurable, DEFAULT_DATA_HOME};
|
||||
use common_options::datanode::{ClientOptions, DatanodeClientOptions};
|
||||
use common_telemetry::logging::{DEFAULT_LOGGING_DIR, DEFAULT_OTLP_HTTP_ENDPOINT, LoggingOptions};
|
||||
@@ -74,14 +75,19 @@ fn test_load_datanode_example_config() {
|
||||
RegionEngineConfig::Mito(MitoConfig {
|
||||
auto_flush_interval: Duration::from_secs(3600),
|
||||
write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
|
||||
scan_memory_limit: MemoryLimit::Percentage(50),
|
||||
..Default::default()
|
||||
}),
|
||||
RegionEngineConfig::File(FileEngineConfig {}),
|
||||
RegionEngineConfig::Metric(MetricEngineConfig {
|
||||
experimental_sparse_primary_key_encoding: false,
|
||||
sparse_primary_key_encoding: true,
|
||||
flush_metadata_region_interval: Duration::from_secs(30),
|
||||
}),
|
||||
],
|
||||
query: QueryOptions {
|
||||
memory_pool_size: MemoryLimit::Percentage(50),
|
||||
..Default::default()
|
||||
},
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
|
||||
@@ -155,6 +161,10 @@ fn test_load_frontend_example_config() {
|
||||
cors_allowed_origins: vec!["https://example.com".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
query: QueryOptions {
|
||||
memory_pool_size: MemoryLimit::Percentage(50),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
@@ -242,6 +252,7 @@ fn test_load_flownode_example_config() {
|
||||
query: QueryOptions {
|
||||
parallelism: 1,
|
||||
allow_query_fallback: false,
|
||||
memory_pool_size: MemoryLimit::Percentage(50),
|
||||
},
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
@@ -286,11 +297,12 @@ fn test_load_standalone_example_config() {
|
||||
RegionEngineConfig::Mito(MitoConfig {
|
||||
auto_flush_interval: Duration::from_secs(3600),
|
||||
write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
|
||||
scan_memory_limit: MemoryLimit::Percentage(50),
|
||||
..Default::default()
|
||||
}),
|
||||
RegionEngineConfig::File(FileEngineConfig {}),
|
||||
RegionEngineConfig::Metric(MetricEngineConfig {
|
||||
experimental_sparse_primary_key_encoding: false,
|
||||
sparse_primary_key_encoding: true,
|
||||
flush_metadata_region_interval: Duration::from_secs(30),
|
||||
}),
|
||||
],
|
||||
@@ -314,7 +326,10 @@ fn test_load_standalone_example_config() {
|
||||
cors_allowed_origins: vec!["https://example.com".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
query: QueryOptions {
|
||||
memory_pool_size: MemoryLimit::Percentage(50),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
pub mod bit_vec;
|
||||
pub mod bytes;
|
||||
pub mod cancellation;
|
||||
pub mod memory_limit;
|
||||
pub mod plugins;
|
||||
pub mod range_read;
|
||||
#[allow(clippy::all)]
|
||||
|
||||
265
src/common/base/src/memory_limit.rs
Normal file
265
src/common/base/src/memory_limit.rs
Normal file
@@ -0,0 +1,265 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use crate::readable_size::ReadableSize;
|
||||
|
||||
/// Memory limit configuration that supports both absolute size and percentage.
|
||||
///
|
||||
/// Examples:
|
||||
/// - Absolute size: "2GB", "4GiB", "512MB"
|
||||
/// - Percentage: "50%", "75%"
|
||||
/// - Unlimited: "unlimited", "0"
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub enum MemoryLimit {
|
||||
/// Absolute memory size.
|
||||
Size(ReadableSize),
|
||||
/// Percentage of total system memory (0-100).
|
||||
Percentage(u8),
|
||||
/// No memory limit.
|
||||
#[default]
|
||||
Unlimited,
|
||||
}
|
||||
|
||||
impl MemoryLimit {
|
||||
/// Resolve the memory limit to bytes based on total system memory.
|
||||
/// Returns 0 if the limit is unlimited.
|
||||
pub fn resolve(&self, total_memory_bytes: u64) -> u64 {
|
||||
match self {
|
||||
MemoryLimit::Size(size) => size.as_bytes(),
|
||||
MemoryLimit::Percentage(pct) => total_memory_bytes * (*pct as u64) / 100,
|
||||
MemoryLimit::Unlimited => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this limit is unlimited.
|
||||
pub fn is_unlimited(&self) -> bool {
|
||||
match self {
|
||||
MemoryLimit::Size(size) => size.as_bytes() == 0,
|
||||
MemoryLimit::Percentage(pct) => *pct == 0,
|
||||
MemoryLimit::Unlimited => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for MemoryLimit {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let s = s.trim();
|
||||
|
||||
if s.eq_ignore_ascii_case("unlimited") {
|
||||
return Ok(MemoryLimit::Unlimited);
|
||||
}
|
||||
|
||||
if let Some(pct_str) = s.strip_suffix('%') {
|
||||
let pct = pct_str
|
||||
.trim()
|
||||
.parse::<u8>()
|
||||
.map_err(|e| format!("invalid percentage value '{}': {}", pct_str, e))?;
|
||||
|
||||
if pct > 100 {
|
||||
return Err(format!("percentage must be between 0 and 100, got {}", pct));
|
||||
}
|
||||
|
||||
if pct == 0 {
|
||||
Ok(MemoryLimit::Unlimited)
|
||||
} else {
|
||||
Ok(MemoryLimit::Percentage(pct))
|
||||
}
|
||||
} else {
|
||||
let size = ReadableSize::from_str(s)?;
|
||||
if size.as_bytes() == 0 {
|
||||
Ok(MemoryLimit::Unlimited)
|
||||
} else {
|
||||
Ok(MemoryLimit::Size(size))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MemoryLimit {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
MemoryLimit::Size(size) => write!(f, "{}", size),
|
||||
MemoryLimit::Percentage(pct) => write!(f, "{}%", pct),
|
||||
MemoryLimit::Unlimited => write!(f, "unlimited"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for MemoryLimit {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_str(&self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for MemoryLimit {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
MemoryLimit::from_str(&s).map_err(serde::de::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_absolute_size() {
|
||||
assert_eq!(
|
||||
"2GB".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Size(ReadableSize(2 * 1024 * 1024 * 1024))
|
||||
);
|
||||
assert_eq!(
|
||||
"512MB".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Size(ReadableSize(512 * 1024 * 1024))
|
||||
);
|
||||
assert_eq!("0".parse::<MemoryLimit>().unwrap(), MemoryLimit::Unlimited);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_percentage() {
|
||||
assert_eq!(
|
||||
"50%".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Percentage(50)
|
||||
);
|
||||
assert_eq!(
|
||||
"75%".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Percentage(75)
|
||||
);
|
||||
assert_eq!("0%".parse::<MemoryLimit>().unwrap(), MemoryLimit::Unlimited);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_invalid() {
|
||||
assert!("150%".parse::<MemoryLimit>().is_err());
|
||||
assert!("-10%".parse::<MemoryLimit>().is_err());
|
||||
assert!("invalid".parse::<MemoryLimit>().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve() {
|
||||
let total = 8 * 1024 * 1024 * 1024; // 8GB
|
||||
|
||||
assert_eq!(
|
||||
MemoryLimit::Size(ReadableSize(2 * 1024 * 1024 * 1024)).resolve(total),
|
||||
2 * 1024 * 1024 * 1024
|
||||
);
|
||||
assert_eq!(
|
||||
MemoryLimit::Percentage(50).resolve(total),
|
||||
4 * 1024 * 1024 * 1024
|
||||
);
|
||||
assert_eq!(MemoryLimit::Unlimited.resolve(total), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unlimited() {
|
||||
assert!(MemoryLimit::Unlimited.is_unlimited());
|
||||
assert!(!MemoryLimit::Size(ReadableSize(1024)).is_unlimited());
|
||||
assert!(!MemoryLimit::Percentage(50).is_unlimited());
|
||||
assert!(!MemoryLimit::Percentage(1).is_unlimited());
|
||||
|
||||
// Defensive: these states shouldn't exist via public API, but check anyway
|
||||
assert!(MemoryLimit::Size(ReadableSize(0)).is_unlimited());
|
||||
assert!(MemoryLimit::Percentage(0).is_unlimited());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_100_percent() {
|
||||
assert_eq!(
|
||||
"100%".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Percentage(100)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display_percentage() {
|
||||
assert_eq!(MemoryLimit::Percentage(20).to_string(), "20%");
|
||||
assert_eq!(MemoryLimit::Percentage(50).to_string(), "50%");
|
||||
assert_eq!(MemoryLimit::Percentage(100).to_string(), "100%");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_unlimited() {
|
||||
assert_eq!(
|
||||
"unlimited".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Unlimited
|
||||
);
|
||||
assert_eq!(
|
||||
"UNLIMITED".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Unlimited
|
||||
);
|
||||
assert_eq!(
|
||||
"Unlimited".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Unlimited
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display_unlimited() {
|
||||
assert_eq!(MemoryLimit::Unlimited.to_string(), "unlimited");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_display_roundtrip() {
|
||||
let cases = vec![
|
||||
"50%",
|
||||
"100%",
|
||||
"1%",
|
||||
"2GB",
|
||||
"512MB",
|
||||
"unlimited",
|
||||
"UNLIMITED",
|
||||
"0", // normalized to unlimited
|
||||
"0%", // normalized to unlimited
|
||||
];
|
||||
|
||||
for input in cases {
|
||||
let parsed = input.parse::<MemoryLimit>().unwrap();
|
||||
let displayed = parsed.to_string();
|
||||
let reparsed = displayed.parse::<MemoryLimit>().unwrap();
|
||||
assert_eq!(
|
||||
parsed, reparsed,
|
||||
"round-trip failed: '{}' -> '{}' -> '{:?}'",
|
||||
input, displayed, reparsed
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero_normalization() {
|
||||
// All forms of zero should normalize to Unlimited
|
||||
assert_eq!("0".parse::<MemoryLimit>().unwrap(), MemoryLimit::Unlimited);
|
||||
assert_eq!("0%".parse::<MemoryLimit>().unwrap(), MemoryLimit::Unlimited);
|
||||
assert_eq!("0B".parse::<MemoryLimit>().unwrap(), MemoryLimit::Unlimited);
|
||||
assert_eq!(
|
||||
"0KB".parse::<MemoryLimit>().unwrap(),
|
||||
MemoryLimit::Unlimited
|
||||
);
|
||||
|
||||
// Unlimited always displays as "unlimited"
|
||||
assert_eq!(MemoryLimit::Unlimited.to_string(), "unlimited");
|
||||
}
|
||||
}
|
||||
@@ -8,5 +8,6 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
const_format.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
27
src/common/catalog/build.rs
Normal file
27
src/common/catalog/build.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
fn main() {
|
||||
// Set DEFAULT_CATALOG_NAME from environment variable or use default value
|
||||
let default_catalog_name =
|
||||
std::env::var("DEFAULT_CATALOG_NAME").unwrap_or_else(|_| "greptime".to_string());
|
||||
|
||||
println!(
|
||||
"cargo:rustc-env=DEFAULT_CATALOG_NAME={}",
|
||||
default_catalog_name
|
||||
);
|
||||
|
||||
// Rerun build script if the environment variable changes
|
||||
println!("cargo:rerun-if-env-changed=DEFAULT_CATALOG_NAME");
|
||||
}
|
||||
@@ -12,13 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use const_format::concatcp;
|
||||
|
||||
pub const SYSTEM_CATALOG_NAME: &str = "system";
|
||||
pub const INFORMATION_SCHEMA_NAME: &str = "information_schema";
|
||||
pub const PG_CATALOG_NAME: &str = "pg_catalog";
|
||||
pub const SYSTEM_CATALOG_TABLE_NAME: &str = "system_catalog";
|
||||
pub const DEFAULT_CATALOG_NAME: &str = "greptime";
|
||||
pub const DEFAULT_CATALOG_NAME: &str = env!("DEFAULT_CATALOG_NAME");
|
||||
pub const DEFAULT_SCHEMA_NAME: &str = "public";
|
||||
pub const DEFAULT_PRIVATE_SCHEMA_NAME: &str = "greptime_private";
|
||||
pub const DEFAULT_PRIVATE_SCHEMA_NAME: &str = concatcp!(DEFAULT_CATALOG_NAME, "_private");
|
||||
|
||||
/// Reserves [0,MIN_USER_FLOW_ID) for internal usage.
|
||||
/// User defined table id starts from this value.
|
||||
|
||||
@@ -45,3 +45,19 @@ pub fn from_err_code_msg_to_header(code: u32, msg: &str) -> HeaderMap {
|
||||
header.insert(GREPTIME_DB_HEADER_ERROR_MSG, msg);
|
||||
header
|
||||
}
|
||||
|
||||
/// Returns the external root cause of the source error (exclude the current error).
|
||||
pub fn root_source(err: &dyn std::error::Error) -> Option<&dyn std::error::Error> {
|
||||
// There are some divergence about the behavior of the `sources()` API
|
||||
// in https://github.com/rust-lang/rust/issues/58520
|
||||
// So this function iterates the sources manually.
|
||||
let mut root = err.source();
|
||||
while let Some(r) = root {
|
||||
if let Some(s) = r.source() {
|
||||
root = Some(s);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
root
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ impl MetaClientSelector {
|
||||
let cfg = ChannelConfig::new()
|
||||
.connect_timeout(Duration::from_secs(30))
|
||||
.timeout(Duration::from_secs(30));
|
||||
let channel_manager = ChannelManager::with_config(cfg);
|
||||
let channel_manager = ChannelManager::with_config(cfg, None);
|
||||
Self {
|
||||
meta_client,
|
||||
channel_manager,
|
||||
|
||||
@@ -12,10 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::aggrs::vector::avg::VectorAvg;
|
||||
use crate::aggrs::vector::product::VectorProduct;
|
||||
use crate::aggrs::vector::sum::VectorSum;
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
mod avg;
|
||||
mod product;
|
||||
mod sum;
|
||||
|
||||
@@ -25,5 +27,6 @@ impl VectorFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register_aggr(VectorSum::uadf_impl());
|
||||
registry.register_aggr(VectorProduct::uadf_impl());
|
||||
registry.register_aggr(VectorAvg::uadf_impl());
|
||||
}
|
||||
}
|
||||
|
||||
270
src/common/function/src/aggrs/vector/avg.rs
Normal file
270
src/common/function/src/aggrs/vector/avg.rs
Normal file
@@ -0,0 +1,270 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray};
|
||||
use arrow::compute::sum;
|
||||
use arrow::datatypes::UInt64Type;
|
||||
use arrow_schema::{DataType, Field};
|
||||
use datafusion_common::{Result, ScalarValue};
|
||||
use datafusion_expr::{
|
||||
Accumulator, AggregateUDF, Signature, SimpleAggregateUDF, TypeSignature, Volatility,
|
||||
};
|
||||
use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs;
|
||||
use nalgebra::{Const, DVector, DVectorView, Dyn, OVector};
|
||||
|
||||
use crate::scalars::vector::impl_conv::{
|
||||
binlit_as_veclit, parse_veclit_from_strlit, veclit_to_binlit,
|
||||
};
|
||||
|
||||
/// The accumulator for the `vec_avg` aggregate function.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct VectorAvg {
|
||||
sum: Option<OVector<f32, Dyn>>,
|
||||
count: u64,
|
||||
}
|
||||
|
||||
impl VectorAvg {
|
||||
/// Create a new `AggregateUDF` for the `vec_avg` aggregate function.
|
||||
pub fn uadf_impl() -> AggregateUDF {
|
||||
let signature = Signature::one_of(
|
||||
vec![
|
||||
TypeSignature::Exact(vec![DataType::Utf8]),
|
||||
TypeSignature::Exact(vec![DataType::LargeUtf8]),
|
||||
TypeSignature::Exact(vec![DataType::Binary]),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
);
|
||||
let udaf = SimpleAggregateUDF::new_with_signature(
|
||||
"vec_avg",
|
||||
signature,
|
||||
DataType::Binary,
|
||||
Arc::new(Self::accumulator),
|
||||
vec![
|
||||
Arc::new(Field::new("sum", DataType::Binary, true)),
|
||||
Arc::new(Field::new("count", DataType::UInt64, true)),
|
||||
],
|
||||
);
|
||||
AggregateUDF::from(udaf)
|
||||
}
|
||||
|
||||
fn accumulator(args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
|
||||
if args.schema.fields().len() != 1 {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"expect creating `VEC_AVG` with only one input field, actual {}",
|
||||
args.schema.fields().len()
|
||||
)));
|
||||
}
|
||||
|
||||
let t = args.schema.field(0).data_type();
|
||||
if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"unexpected input datatype {t} when creating `VEC_AVG`"
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(Box::new(VectorAvg::default()))
|
||||
}
|
||||
|
||||
fn inner(&mut self, len: usize) -> &mut OVector<f32, Dyn> {
|
||||
self.sum
|
||||
.get_or_insert_with(|| OVector::zeros_generic(Dyn(len), Const::<1>))
|
||||
}
|
||||
|
||||
fn update(&mut self, values: &[ArrayRef], is_update: bool) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let vectors = match values[0].data_type() {
|
||||
DataType::Utf8 => {
|
||||
let arr: &StringArray = values[0].as_string();
|
||||
arr.iter()
|
||||
.filter_map(|x| x.map(|s| parse_veclit_from_strlit(s).map_err(Into::into)))
|
||||
.map(|x| x.map(Cow::Owned))
|
||||
.collect::<Result<Vec<_>>>()?
|
||||
}
|
||||
DataType::LargeUtf8 => {
|
||||
let arr: &LargeStringArray = values[0].as_string();
|
||||
arr.iter()
|
||||
.filter_map(|x| x.map(|s| parse_veclit_from_strlit(s).map_err(Into::into)))
|
||||
.map(|x: Result<Vec<f32>>| x.map(Cow::Owned))
|
||||
.collect::<Result<Vec<_>>>()?
|
||||
}
|
||||
DataType::Binary => {
|
||||
let arr: &BinaryArray = values[0].as_binary();
|
||||
arr.iter()
|
||||
.filter_map(|x| x.map(|b| binlit_as_veclit(b).map_err(Into::into)))
|
||||
.collect::<Result<Vec<_>>>()?
|
||||
}
|
||||
_ => {
|
||||
return Err(datafusion_common::DataFusionError::NotImplemented(format!(
|
||||
"unsupported data type {} for `VEC_AVG`",
|
||||
values[0].data_type()
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
if vectors.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let len = if is_update {
|
||||
vectors.len() as u64
|
||||
} else {
|
||||
sum(values[1].as_primitive::<UInt64Type>()).unwrap_or_default()
|
||||
};
|
||||
|
||||
let dims = vectors[0].len();
|
||||
let mut sum = DVector::zeros(dims);
|
||||
for v in vectors {
|
||||
if v.len() != dims {
|
||||
return Err(datafusion_common::DataFusionError::Execution(
|
||||
"vectors length not match: VEC_AVG".to_string(),
|
||||
));
|
||||
}
|
||||
let v_view = DVectorView::from_slice(&v, dims);
|
||||
sum += &v_view;
|
||||
}
|
||||
|
||||
*self.inner(dims) += sum;
|
||||
self.count += len;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for VectorAvg {
|
||||
fn state(&mut self) -> Result<Vec<ScalarValue>> {
|
||||
let vector = match &self.sum {
|
||||
None => ScalarValue::Binary(None),
|
||||
Some(sum) => ScalarValue::Binary(Some(veclit_to_binlit(sum.as_slice()))),
|
||||
};
|
||||
Ok(vec![vector, ScalarValue::from(self.count)])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
|
||||
self.update(values, true)
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
|
||||
self.update(states, false)
|
||||
}
|
||||
|
||||
fn evaluate(&mut self) -> Result<ScalarValue> {
|
||||
match &self.sum {
|
||||
None => Ok(ScalarValue::Binary(None)),
|
||||
Some(sum) => Ok(ScalarValue::Binary(Some(veclit_to_binlit(
|
||||
(sum / self.count as f32).as_slice(),
|
||||
)))),
|
||||
}
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
size_of_val(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::StringArray;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::{ConstantVector, StringVector, Vector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
vec_avg.update_batch(&[]).unwrap();
|
||||
assert!(vec_avg.sum.is_none());
|
||||
assert_eq!(ScalarValue::Binary(None), vec_avg.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
]))];
|
||||
vec_avg.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(veclit_to_binlit(&[2.5, 3.5, 4.5]))),
|
||||
vec_avg.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update one null value
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![Option::<String>::None]))];
|
||||
vec_avg.update_batch(&v).unwrap();
|
||||
assert_eq!(ScalarValue::Binary(None), vec_avg.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
]))];
|
||||
vec_avg.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(veclit_to_binlit(&[4.0, 5.0, 6.0]))),
|
||||
vec_avg.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update null-value batch
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
None,
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
]))];
|
||||
vec_avg.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(veclit_to_binlit(&[4.0, 5.0, 6.0]))),
|
||||
vec_avg.evaluate().unwrap()
|
||||
);
|
||||
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
|
||||
None,
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
]))];
|
||||
vec_avg.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(veclit_to_binlit(&[5.5, 6.5, 7.5]))),
|
||||
vec_avg.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update with constant vector
|
||||
let mut vec_avg = VectorAvg::default();
|
||||
let v: Vec<ArrayRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from_vec(vec!["[1.0,2.0,3.0]".to_string()])),
|
||||
4,
|
||||
))
|
||||
.to_arrow_array(),
|
||||
];
|
||||
vec_avg.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(veclit_to_binlit(&[1.0, 2.0, 3.0]))),
|
||||
vec_avg.evaluate().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
mod convert;
|
||||
mod distance;
|
||||
mod elem_avg;
|
||||
mod elem_product;
|
||||
mod elem_sum;
|
||||
pub mod impl_conv;
|
||||
@@ -64,6 +65,7 @@ impl VectorFunction {
|
||||
registry.register_scalar(vector_subvector::VectorSubvectorFunction::default());
|
||||
registry.register_scalar(elem_sum::ElemSumFunction::default());
|
||||
registry.register_scalar(elem_product::ElemProductFunction::default());
|
||||
registry.register_scalar(elem_avg::ElemAvgFunction::default());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
128
src/common/function/src/scalars/vector/elem_avg.rs
Normal file
128
src/common/function/src/scalars/vector/elem_avg.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use datafusion::arrow::datatypes::DataType;
|
||||
use datafusion::logical_expr::ColumnarValue;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datafusion_expr::type_coercion::aggregates::{BINARYS, STRINGS};
|
||||
use datafusion_expr::{ScalarFunctionArgs, Signature, TypeSignature, Volatility};
|
||||
use nalgebra::DVectorView;
|
||||
|
||||
use crate::function::Function;
|
||||
use crate::scalars::vector::{VectorCalculator, impl_conv};
|
||||
|
||||
const NAME: &str = "vec_elem_avg";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ElemAvgFunction {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl Default for ElemAvgFunction {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
signature: Signature::one_of(
|
||||
vec![
|
||||
TypeSignature::Uniform(1, STRINGS.to_vec()),
|
||||
TypeSignature::Uniform(1, BINARYS.to_vec()),
|
||||
TypeSignature::Uniform(1, vec![DataType::BinaryView]),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ElemAvgFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
Ok(DataType::Float32)
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let body = |v0: &ScalarValue| -> datafusion_common::Result<ScalarValue> {
|
||||
let v0 =
|
||||
impl_conv::as_veclit(v0)?.map(|v0| DVectorView::from_slice(&v0, v0.len()).mean());
|
||||
Ok(ScalarValue::Float32(v0))
|
||||
};
|
||||
|
||||
let calculator = VectorCalculator {
|
||||
name: self.name(),
|
||||
func: body,
|
||||
};
|
||||
calculator.invoke_with_single_argument(args)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ElemAvgFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::StringViewArray;
|
||||
use arrow_schema::Field;
|
||||
use datafusion::arrow::array::{Array, AsArray};
|
||||
use datafusion::arrow::datatypes::Float32Type;
|
||||
use datafusion_common::config::ConfigOptions;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_elem_avg() {
|
||||
let func = ElemAvgFunction::default();
|
||||
|
||||
let input = Arc::new(StringViewArray::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![ColumnarValue::Array(input.clone())],
|
||||
arg_fields: vec![],
|
||||
number_rows: input.len(),
|
||||
return_field: Arc::new(Field::new("x", DataType::Float32, true)),
|
||||
config_options: Arc::new(ConfigOptions::new()),
|
||||
})
|
||||
.and_then(|v| ColumnarValue::values_to_arrays(&[v]))
|
||||
.map(|mut a| a.remove(0))
|
||||
.unwrap();
|
||||
let result = result.as_primitive::<Float32Type>();
|
||||
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(result.value(0), 2.0);
|
||||
assert_eq!(result.value(1), 5.0);
|
||||
assert_eq!(result.value(2), 8.0);
|
||||
assert!(result.is_null(3));
|
||||
}
|
||||
}
|
||||
@@ -16,13 +16,17 @@ mod version;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_PRIVATE_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
|
||||
};
|
||||
use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
|
||||
use datafusion::catalog::TableFunction;
|
||||
use datafusion::common::ScalarValue;
|
||||
use datafusion::common::utils::SingleRowListArrayBuilder;
|
||||
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility};
|
||||
use datafusion_pg_catalog::pg_catalog::{self, PgCatalogStaticTables};
|
||||
use datatypes::arrow::datatypes::{DataType, Field};
|
||||
use derive_more::derive::Display;
|
||||
use version::PGVersionFunction;
|
||||
|
||||
use crate::function::{Function, find_function_context};
|
||||
@@ -35,7 +39,6 @@ const SESSION_USER_FUNCTION_NAME: &str = "session_user";
|
||||
const CURRENT_DATABASE_FUNCTION_NAME: &str = "current_database";
|
||||
|
||||
define_nullary_udf!(CurrentSchemaFunction);
|
||||
define_nullary_udf!(CurrentSchemasFunction);
|
||||
define_nullary_udf!(SessionUserFunction);
|
||||
define_nullary_udf!(CurrentDatabaseFunction);
|
||||
|
||||
@@ -115,6 +118,23 @@ impl Function for SessionUserFunction {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Display, Debug)]
|
||||
#[display("{}", self.name())]
|
||||
pub(super) struct CurrentSchemasFunction {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl CurrentSchemasFunction {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
signature: Signature::new(
|
||||
TypeSignature::Exact(vec![DataType::Boolean]),
|
||||
Volatility::Stable,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for CurrentSchemasFunction {
|
||||
fn name(&self) -> &str {
|
||||
CURRENT_SCHEMAS_FUNCTION_NAME
|
||||
@@ -122,9 +142,9 @@ impl Function for CurrentSchemasFunction {
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
Ok(DataType::List(Arc::new(Field::new(
|
||||
"x",
|
||||
DataType::Utf8View,
|
||||
false,
|
||||
"item",
|
||||
DataType::Utf8,
|
||||
true,
|
||||
))))
|
||||
}
|
||||
|
||||
@@ -143,9 +163,9 @@ impl Function for CurrentSchemasFunction {
|
||||
let mut values = vec!["public"];
|
||||
// include implicit schemas
|
||||
if input.value(0) {
|
||||
values.push("information_schema");
|
||||
values.push("pg_catalog");
|
||||
values.push("greptime_private");
|
||||
values.push(INFORMATION_SCHEMA_NAME);
|
||||
values.push(PG_CATALOG_NAME);
|
||||
values.push(DEFAULT_PRIVATE_SCHEMA_NAME);
|
||||
}
|
||||
|
||||
let list_array = SingleRowListArrayBuilder::new(Arc::new(StringArray::from(values)));
|
||||
@@ -165,7 +185,7 @@ impl PGCatalogFunction {
|
||||
|
||||
registry.register_scalar(PGVersionFunction::default());
|
||||
registry.register_scalar(CurrentSchemaFunction::default());
|
||||
registry.register_scalar(CurrentSchemasFunction::default());
|
||||
registry.register_scalar(CurrentSchemasFunction::new());
|
||||
registry.register_scalar(SessionUserFunction::default());
|
||||
registry.register_scalar(CurrentDatabaseFunction::default());
|
||||
registry.register(pg_catalog::format_type::create_format_type_udf());
|
||||
@@ -191,7 +211,10 @@ impl PGCatalogFunction {
|
||||
registry.register(pg_catalog::create_pg_get_userbyid_udf());
|
||||
registry.register(pg_catalog::create_pg_table_is_visible());
|
||||
registry.register(pg_catalog::pg_get_expr_udf::create_pg_get_expr_udf());
|
||||
// TODO(sunng87): upgrade datafusion to add
|
||||
//registry.register(pg_catalog::create_pg_encoding_to_char_udf());
|
||||
registry.register(pg_catalog::create_pg_encoding_to_char_udf());
|
||||
registry.register(pg_catalog::create_pg_relation_size_udf());
|
||||
registry.register(pg_catalog::create_pg_total_relation_size_udf());
|
||||
registry.register(pg_catalog::create_pg_stat_get_numscans());
|
||||
registry.register(pg_catalog::create_pg_get_constraintdef());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,14 +22,14 @@ use dashmap::DashMap;
|
||||
use dashmap::mapref::entry::Entry;
|
||||
use lazy_static::lazy_static;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tonic::transport::{
|
||||
Certificate, Channel as InnerChannel, ClientTlsConfig, Endpoint, Identity, Uri,
|
||||
};
|
||||
use tower::Service;
|
||||
|
||||
use crate::error::{CreateChannelSnafu, InvalidConfigFilePathSnafu, InvalidTlsConfigSnafu, Result};
|
||||
use crate::error::{CreateChannelSnafu, InvalidConfigFilePathSnafu, Result};
|
||||
|
||||
const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
|
||||
pub const DEFAULT_GRPC_REQUEST_TIMEOUT_SECS: u64 = 10;
|
||||
@@ -91,57 +91,18 @@ impl ChannelManager {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn with_config(config: ChannelConfig) -> Self {
|
||||
let inner = Inner::with_config(config);
|
||||
/// unified with config function that support tls config
|
||||
/// use [`load_tls_config`] to load tls config from file system
|
||||
pub fn with_config(config: ChannelConfig, tls_config: Option<ClientTlsConfig>) -> Self {
|
||||
let mut inner = Inner::with_config(config.clone());
|
||||
if let Some(tls_config) = tls_config {
|
||||
inner.client_tls_config = Some(tls_config);
|
||||
}
|
||||
Self {
|
||||
inner: Arc::new(inner),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read tls cert and key files and create a ChannelManager with TLS config.
|
||||
pub fn with_tls_config(config: ChannelConfig) -> Result<Self> {
|
||||
let mut inner = Inner::with_config(config.clone());
|
||||
|
||||
// setup tls
|
||||
let path_config = config.client_tls.context(InvalidTlsConfigSnafu {
|
||||
msg: "no config input",
|
||||
})?;
|
||||
|
||||
if !path_config.enabled {
|
||||
// if TLS not enabled, just ignore other tls config
|
||||
// and not set `client_tls_config` hence not use TLS
|
||||
return Ok(Self {
|
||||
inner: Arc::new(inner),
|
||||
});
|
||||
}
|
||||
|
||||
let mut tls_config = ClientTlsConfig::new();
|
||||
|
||||
if let Some(server_ca) = path_config.server_ca_cert_path {
|
||||
let server_root_ca_cert =
|
||||
std::fs::read_to_string(server_ca).context(InvalidConfigFilePathSnafu)?;
|
||||
let server_root_ca_cert = Certificate::from_pem(server_root_ca_cert);
|
||||
tls_config = tls_config.ca_certificate(server_root_ca_cert);
|
||||
}
|
||||
|
||||
if let (Some(client_cert_path), Some(client_key_path)) =
|
||||
(&path_config.client_cert_path, &path_config.client_key_path)
|
||||
{
|
||||
let client_cert =
|
||||
std::fs::read_to_string(client_cert_path).context(InvalidConfigFilePathSnafu)?;
|
||||
let client_key =
|
||||
std::fs::read_to_string(client_key_path).context(InvalidConfigFilePathSnafu)?;
|
||||
let client_identity = Identity::from_pem(client_cert, client_key);
|
||||
tls_config = tls_config.identity(client_identity);
|
||||
}
|
||||
|
||||
inner.client_tls_config = Some(tls_config);
|
||||
|
||||
Ok(Self {
|
||||
inner: Arc::new(inner),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &ChannelConfig {
|
||||
&self.inner.config
|
||||
}
|
||||
@@ -287,6 +248,34 @@ impl ChannelManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_tls_config(tls_option: Option<&ClientTlsOption>) -> Result<Option<ClientTlsConfig>> {
|
||||
let path_config = match tls_option {
|
||||
Some(path_config) if path_config.enabled => path_config,
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let mut tls_config = ClientTlsConfig::new();
|
||||
|
||||
if let Some(server_ca) = &path_config.server_ca_cert_path {
|
||||
let server_root_ca_cert =
|
||||
std::fs::read_to_string(server_ca).context(InvalidConfigFilePathSnafu)?;
|
||||
let server_root_ca_cert = Certificate::from_pem(server_root_ca_cert);
|
||||
tls_config = tls_config.ca_certificate(server_root_ca_cert);
|
||||
}
|
||||
|
||||
if let (Some(client_cert_path), Some(client_key_path)) =
|
||||
(&path_config.client_cert_path, &path_config.client_key_path)
|
||||
{
|
||||
let client_cert =
|
||||
std::fs::read_to_string(client_cert_path).context(InvalidConfigFilePathSnafu)?;
|
||||
let client_key =
|
||||
std::fs::read_to_string(client_key_path).context(InvalidConfigFilePathSnafu)?;
|
||||
let client_identity = Identity::from_pem(client_cert, client_key);
|
||||
tls_config = tls_config.identity(client_identity);
|
||||
}
|
||||
Ok(Some(tls_config))
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ClientTlsOption {
|
||||
/// Whether to enable TLS for client.
|
||||
@@ -659,7 +648,7 @@ mod tests {
|
||||
.http2_adaptive_window(true)
|
||||
.tcp_keepalive(Duration::from_secs(2))
|
||||
.tcp_nodelay(true);
|
||||
let mgr = ChannelManager::with_config(config);
|
||||
let mgr = ChannelManager::with_config(config, None);
|
||||
|
||||
let res = mgr.build_endpoint("test_addr");
|
||||
|
||||
|
||||
@@ -12,14 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager, ClientTlsOption};
|
||||
use common_grpc::channel_manager::{
|
||||
ChannelConfig, ChannelManager, ClientTlsOption, load_tls_config,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mtls_config() {
|
||||
// test no config
|
||||
let config = ChannelConfig::new();
|
||||
let re = ChannelManager::with_tls_config(config);
|
||||
assert!(re.is_err());
|
||||
let re = load_tls_config(config.client_tls.as_ref());
|
||||
assert!(re.is_ok());
|
||||
assert!(re.unwrap().is_none());
|
||||
|
||||
// test wrong file
|
||||
let config = ChannelConfig::new().client_tls_config(ClientTlsOption {
|
||||
@@ -29,7 +32,7 @@ async fn test_mtls_config() {
|
||||
client_key_path: Some("tests/tls/wrong_client.key".to_string()),
|
||||
});
|
||||
|
||||
let re = ChannelManager::with_tls_config(config);
|
||||
let re = load_tls_config(config.client_tls.as_ref());
|
||||
assert!(re.is_err());
|
||||
|
||||
// test corrupted file content
|
||||
@@ -40,7 +43,9 @@ async fn test_mtls_config() {
|
||||
client_key_path: Some("tests/tls/corrupted".to_string()),
|
||||
});
|
||||
|
||||
let re = ChannelManager::with_tls_config(config).unwrap();
|
||||
let tls_config = load_tls_config(config.client_tls.as_ref()).unwrap();
|
||||
let re = ChannelManager::with_config(config, tls_config);
|
||||
|
||||
let re = re.get("127.0.0.1:0");
|
||||
assert!(re.is_err());
|
||||
|
||||
@@ -52,7 +57,8 @@ async fn test_mtls_config() {
|
||||
client_key_path: Some("tests/tls/client.key".to_string()),
|
||||
});
|
||||
|
||||
let re = ChannelManager::with_tls_config(config).unwrap();
|
||||
let tls_config = load_tls_config(config.client_tls.as_ref()).unwrap();
|
||||
let re = ChannelManager::with_config(config, tls_config);
|
||||
let re = re.get("127.0.0.1:0");
|
||||
let _ = re.unwrap();
|
||||
}
|
||||
|
||||
@@ -77,7 +77,10 @@ serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
sqlx = { workspace = true, optional = true }
|
||||
sqlx = { workspace = true, features = [
|
||||
"mysql",
|
||||
"chrono",
|
||||
], optional = true }
|
||||
store-api.workspace = true
|
||||
strum.workspace = true
|
||||
table = { workspace = true, features = ["testing"] }
|
||||
|
||||
@@ -25,8 +25,7 @@ use store_api::region_engine::{RegionRole, RegionStatistic};
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::error::{self, DeserializeFromJsonSnafu, Result};
|
||||
use crate::heartbeat::utils::get_datanode_workloads;
|
||||
|
||||
const DATANODE_STAT_PREFIX: &str = "__meta_datanode_stat";
|
||||
@@ -66,10 +65,12 @@ pub struct Stat {
|
||||
pub node_epoch: u64,
|
||||
/// The datanode workloads.
|
||||
pub datanode_workloads: DatanodeWorkloads,
|
||||
/// The GC statistics of the datanode.
|
||||
pub gc_stat: Option<GcStat>,
|
||||
}
|
||||
|
||||
/// The statistics of a region.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct RegionStat {
|
||||
/// The region_id.
|
||||
pub id: RegionId,
|
||||
@@ -126,7 +127,7 @@ pub trait TopicStatsReporter: Send + Sync {
|
||||
fn reportable_topics(&mut self) -> Vec<TopicStat>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub enum RegionManifestInfo {
|
||||
Mito {
|
||||
manifest_version: u64,
|
||||
@@ -222,11 +223,12 @@ impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
node_epoch,
|
||||
node_workloads,
|
||||
topic_stats,
|
||||
extensions,
|
||||
..
|
||||
} = value;
|
||||
|
||||
match (header, peer) {
|
||||
(Some(_header), Some(peer)) => {
|
||||
(Some(header), Some(peer)) => {
|
||||
let region_stats = region_stats
|
||||
.iter()
|
||||
.map(RegionStat::from)
|
||||
@@ -234,6 +236,14 @@ impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
let topic_stats = topic_stats.iter().map(TopicStat::from).collect::<Vec<_>>();
|
||||
|
||||
let datanode_workloads = get_datanode_workloads(node_workloads.as_ref());
|
||||
|
||||
let gc_stat = GcStat::from_extensions(extensions).map_err(|err| {
|
||||
common_telemetry::error!(
|
||||
"Failed to deserialize GcStat from extensions: {}",
|
||||
err
|
||||
);
|
||||
header.clone()
|
||||
})?;
|
||||
Ok(Self {
|
||||
timestamp_millis: time_util::current_time_millis(),
|
||||
// datanode id
|
||||
@@ -247,6 +257,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
topic_stats,
|
||||
node_epoch: *node_epoch,
|
||||
datanode_workloads,
|
||||
gc_stat,
|
||||
})
|
||||
}
|
||||
(header, _) => Err(header.clone()),
|
||||
@@ -319,6 +330,43 @@ impl From<&api::v1::meta::TopicStat> for TopicStat {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct GcStat {
|
||||
/// Number of GC tasks currently running on the datanode.
|
||||
pub running_gc_tasks: u32,
|
||||
/// The maximum number of concurrent GC tasks the datanode can handle.
|
||||
pub gc_concurrency: u32,
|
||||
}
|
||||
|
||||
impl GcStat {
|
||||
pub const GC_STAT_KEY: &str = "__gc_stat";
|
||||
|
||||
pub fn new(running_gc_tasks: u32, gc_concurrency: u32) -> Self {
|
||||
Self {
|
||||
running_gc_tasks,
|
||||
gc_concurrency,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_extensions(&self, extensions: &mut std::collections::HashMap<String, Vec<u8>>) {
|
||||
let bytes = serde_json::to_vec(self).unwrap_or_default();
|
||||
extensions.insert(Self::GC_STAT_KEY.to_string(), bytes);
|
||||
}
|
||||
|
||||
pub fn from_extensions(
|
||||
extensions: &std::collections::HashMap<String, Vec<u8>>,
|
||||
) -> Result<Option<Self>> {
|
||||
extensions
|
||||
.get(Self::GC_STAT_KEY)
|
||||
.map(|bytes| {
|
||||
serde_json::from_slice(bytes).with_context(|_| DeserializeFromJsonSnafu {
|
||||
input: String::from_utf8_lossy(bytes).to_string(),
|
||||
})
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
}
|
||||
|
||||
/// The key of the datanode stat in the memory store.
|
||||
///
|
||||
/// The format is `__meta_datanode_stat-0-{node_id}`.
|
||||
|
||||
@@ -442,7 +442,7 @@ pub fn extract_column_metadatas(
|
||||
results: &mut [RegionResponse],
|
||||
key: &str,
|
||||
) -> Result<Option<Vec<ColumnMetadata>>> {
|
||||
let schemas = results
|
||||
let mut schemas = results
|
||||
.iter_mut()
|
||||
.map(|r| r.extensions.remove(key))
|
||||
.collect::<Vec<_>>();
|
||||
@@ -454,20 +454,24 @@ pub fn extract_column_metadatas(
|
||||
|
||||
// Verify all the physical schemas are the same
|
||||
// Safety: previous check ensures this vec is not empty
|
||||
let first = schemas.first().unwrap();
|
||||
ensure!(
|
||||
schemas.iter().all(|x| x == first),
|
||||
MetadataCorruptionSnafu {
|
||||
err_msg: "The table column metadata schemas from datanodes are not the same."
|
||||
}
|
||||
);
|
||||
let first_column_metadatas = schemas
|
||||
.swap_remove(0)
|
||||
.map(|first_bytes| ColumnMetadata::decode_list(&first_bytes).context(DecodeJsonSnafu))
|
||||
.transpose()?;
|
||||
|
||||
if let Some(first) = first {
|
||||
let column_metadatas = ColumnMetadata::decode_list(first).context(DecodeJsonSnafu)?;
|
||||
Ok(Some(column_metadatas))
|
||||
} else {
|
||||
Ok(None)
|
||||
for s in schemas {
|
||||
// check decoded column metadata instead of bytes because it contains extension map.
|
||||
let column_metadata = s
|
||||
.map(|bytes| ColumnMetadata::decode_list(&bytes).context(DecodeJsonSnafu))
|
||||
.transpose()?;
|
||||
ensure!(
|
||||
column_metadata == first_column_metadatas,
|
||||
MetadataCorruptionSnafu {
|
||||
err_msg: "The table column metadata schemas from datanodes are not the same."
|
||||
}
|
||||
);
|
||||
}
|
||||
Ok(first_column_metadatas)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::fmt::{Display, Formatter};
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use store_api::storage::{FileRefsManifest, GcReport, RegionId, RegionNumber};
|
||||
use strum::Display;
|
||||
use table::metadata::TableId;
|
||||
use table::table_name::TableName;
|
||||
@@ -250,7 +250,7 @@ pub struct UpgradeRegion {
|
||||
/// `None` stands for no wait,
|
||||
/// it's helpful to verify whether the leader region is ready.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub replay_timeout: Option<Duration>,
|
||||
pub replay_timeout: Duration,
|
||||
/// The hint for replaying memtable.
|
||||
#[serde(default)]
|
||||
pub location_id: Option<u64>,
|
||||
@@ -417,6 +417,88 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
/// Instruction to get file references for specified regions.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct GetFileRefs {
|
||||
/// List of region IDs to get file references for.
|
||||
pub region_ids: Vec<RegionId>,
|
||||
}
|
||||
|
||||
impl Display for GetFileRefs {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "GetFileRefs(region_ids={:?})", self.region_ids)
|
||||
}
|
||||
}
|
||||
|
||||
/// Instruction to trigger garbage collection for a region.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct GcRegions {
|
||||
/// The region ID to perform GC on.
|
||||
pub regions: Vec<RegionId>,
|
||||
/// The file references manifest containing temporary file references.
|
||||
pub file_refs_manifest: FileRefsManifest,
|
||||
/// Whether to perform a full file listing to find orphan files.
|
||||
pub full_file_listing: bool,
|
||||
}
|
||||
|
||||
impl Display for GcRegions {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"GcRegion(regions={:?}, file_refs_count={}, full_file_listing={})",
|
||||
self.regions,
|
||||
self.file_refs_manifest.file_refs.len(),
|
||||
self.full_file_listing
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reply for GetFileRefs instruction.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct GetFileRefsReply {
|
||||
/// The file references manifest.
|
||||
pub file_refs_manifest: FileRefsManifest,
|
||||
/// Whether the operation was successful.
|
||||
pub success: bool,
|
||||
/// Error message if any.
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
impl Display for GetFileRefsReply {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"GetFileRefsReply(success={}, file_refs_count={}, error={:?})",
|
||||
self.success,
|
||||
self.file_refs_manifest.file_refs.len(),
|
||||
self.error
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reply for GC instruction.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct GcRegionsReply {
|
||||
pub result: Result<GcReport, String>,
|
||||
}
|
||||
|
||||
impl Display for GcRegionsReply {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"GcReply(result={})",
|
||||
match &self.result {
|
||||
Ok(report) => format!(
|
||||
"GcReport(deleted_files_count={}, need_retry_regions_count={})",
|
||||
report.deleted_files.len(),
|
||||
report.need_retry_regions.len()
|
||||
),
|
||||
Err(err) => format!("Err({})", err),
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Display, PartialEq)]
|
||||
pub enum Instruction {
|
||||
/// Opens regions.
|
||||
@@ -425,18 +507,23 @@ pub enum Instruction {
|
||||
/// Closes regions.
|
||||
#[serde(deserialize_with = "single_or_multiple_from", alias = "CloseRegion")]
|
||||
CloseRegions(Vec<RegionIdent>),
|
||||
/// Upgrades a region.
|
||||
UpgradeRegion(UpgradeRegion),
|
||||
/// Upgrades regions.
|
||||
#[serde(deserialize_with = "single_or_multiple_from", alias = "UpgradeRegion")]
|
||||
UpgradeRegions(Vec<UpgradeRegion>),
|
||||
#[serde(
|
||||
deserialize_with = "single_or_multiple_from",
|
||||
alias = "DowngradeRegion"
|
||||
)]
|
||||
/// Downgrades a region.
|
||||
/// Downgrades regions.
|
||||
DowngradeRegions(Vec<DowngradeRegion>),
|
||||
/// Invalidates batch cache.
|
||||
InvalidateCaches(Vec<CacheIdent>),
|
||||
/// Flushes regions.
|
||||
FlushRegions(FlushRegions),
|
||||
/// Gets file references for regions.
|
||||
GetFileRefs(GetFileRefs),
|
||||
/// Triggers garbage collection for a region.
|
||||
GcRegions(GcRegions),
|
||||
}
|
||||
|
||||
impl Instruction {
|
||||
@@ -473,9 +560,23 @@ impl Instruction {
|
||||
}
|
||||
|
||||
/// Converts the instruction into a [UpgradeRegion].
|
||||
pub fn into_upgrade_regions(self) -> Option<UpgradeRegion> {
|
||||
pub fn into_upgrade_regions(self) -> Option<Vec<UpgradeRegion>> {
|
||||
match self {
|
||||
Self::UpgradeRegion(upgrade_region) => Some(upgrade_region),
|
||||
Self::UpgradeRegions(upgrade_region) => Some(upgrade_region),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_get_file_refs(self) -> Option<GetFileRefs> {
|
||||
match self {
|
||||
Self::GetFileRefs(get_file_refs) => Some(get_file_refs),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_gc_regions(self) -> Option<GcRegions> {
|
||||
match self {
|
||||
Self::GcRegions(gc_regions) => Some(gc_regions),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -484,6 +585,10 @@ impl Instruction {
|
||||
/// The reply of [UpgradeRegion].
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
pub struct UpgradeRegionReply {
|
||||
/// The [RegionId].
|
||||
/// For compatibility, it is defaulted to [RegionId::new(0, 0)].
|
||||
#[serde(default)]
|
||||
pub region_id: RegionId,
|
||||
/// Returns true if `last_entry_id` has been replayed to the latest.
|
||||
pub ready: bool,
|
||||
/// Indicates whether the region exists.
|
||||
@@ -535,6 +640,39 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
pub struct UpgradeRegionsReply {
|
||||
pub replies: Vec<UpgradeRegionReply>,
|
||||
}
|
||||
|
||||
impl UpgradeRegionsReply {
|
||||
pub fn new(replies: Vec<UpgradeRegionReply>) -> Self {
|
||||
Self { replies }
|
||||
}
|
||||
|
||||
pub fn single(reply: UpgradeRegionReply) -> Self {
|
||||
Self::new(vec![reply])
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum UpgradeRegionsCompat {
|
||||
Single(UpgradeRegionReply),
|
||||
Multiple(UpgradeRegionsReply),
|
||||
}
|
||||
|
||||
fn upgrade_regions_compat_from<'de, D>(deserializer: D) -> Result<UpgradeRegionsReply, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let helper = UpgradeRegionsCompat::deserialize(deserializer)?;
|
||||
Ok(match helper {
|
||||
UpgradeRegionsCompat::Single(x) => UpgradeRegionsReply::new(vec![x]),
|
||||
UpgradeRegionsCompat::Multiple(reply) => reply,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum InstructionReply {
|
||||
@@ -542,13 +680,19 @@ pub enum InstructionReply {
|
||||
OpenRegions(SimpleReply),
|
||||
#[serde(alias = "close_region")]
|
||||
CloseRegions(SimpleReply),
|
||||
UpgradeRegion(UpgradeRegionReply),
|
||||
#[serde(
|
||||
deserialize_with = "upgrade_regions_compat_from",
|
||||
alias = "upgrade_region"
|
||||
)]
|
||||
UpgradeRegions(UpgradeRegionsReply),
|
||||
#[serde(
|
||||
alias = "downgrade_region",
|
||||
deserialize_with = "downgrade_regions_compat_from"
|
||||
)]
|
||||
DowngradeRegions(DowngradeRegionsReply),
|
||||
FlushRegions(FlushRegionReply),
|
||||
GetFileRefs(GetFileRefsReply),
|
||||
GcRegions(GcRegionsReply),
|
||||
}
|
||||
|
||||
impl Display for InstructionReply {
|
||||
@@ -556,11 +700,15 @@ impl Display for InstructionReply {
|
||||
match self {
|
||||
Self::OpenRegions(reply) => write!(f, "InstructionReply::OpenRegions({})", reply),
|
||||
Self::CloseRegions(reply) => write!(f, "InstructionReply::CloseRegions({})", reply),
|
||||
Self::UpgradeRegion(reply) => write!(f, "InstructionReply::UpgradeRegion({})", reply),
|
||||
Self::UpgradeRegions(reply) => {
|
||||
write!(f, "InstructionReply::UpgradeRegions({:?})", reply.replies)
|
||||
}
|
||||
Self::DowngradeRegions(reply) => {
|
||||
write!(f, "InstructionReply::DowngradeRegions({:?})", reply)
|
||||
write!(f, "InstructionReply::DowngradeRegions({:?})", reply.replies)
|
||||
}
|
||||
Self::FlushRegions(reply) => write!(f, "InstructionReply::FlushRegions({})", reply),
|
||||
Self::GetFileRefs(reply) => write!(f, "InstructionReply::GetFileRefs({})", reply),
|
||||
Self::GcRegions(reply) => write!(f, "InstructionReply::GcRegion({})", reply),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -581,9 +729,9 @@ impl InstructionReply {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect_upgrade_region_reply(self) -> UpgradeRegionReply {
|
||||
pub fn expect_upgrade_regions_reply(self) -> Vec<UpgradeRegionReply> {
|
||||
match self {
|
||||
Self::UpgradeRegion(reply) => reply,
|
||||
Self::UpgradeRegions(reply) => reply.replies,
|
||||
_ => panic!("Expected UpgradeRegion reply"),
|
||||
}
|
||||
}
|
||||
@@ -605,6 +753,10 @@ impl InstructionReply {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use store_api::storage::FileId;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -641,25 +793,58 @@ mod tests {
|
||||
serialized
|
||||
);
|
||||
|
||||
let downgrade_region = InstructionReply::DowngradeRegions(DowngradeRegionsReply::single(
|
||||
DowngradeRegionReply {
|
||||
let upgrade_region = Instruction::UpgradeRegions(vec![UpgradeRegion {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
location_id: None,
|
||||
replay_entry_id: None,
|
||||
metadata_replay_entry_id: None,
|
||||
}]);
|
||||
|
||||
let serialized = serde_json::to_string(&upgrade_region).unwrap();
|
||||
assert_eq!(
|
||||
r#"{"UpgradeRegions":[{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"replay_timeout":"1s","location_id":null}]}"#,
|
||||
serialized
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_instruction_reply() {
|
||||
let downgrade_region_reply = InstructionReply::DowngradeRegions(
|
||||
DowngradeRegionsReply::single(DowngradeRegionReply {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
exists: true,
|
||||
error: None,
|
||||
},
|
||||
));
|
||||
}),
|
||||
);
|
||||
|
||||
let serialized = serde_json::to_string(&downgrade_region).unwrap();
|
||||
let serialized = serde_json::to_string(&downgrade_region_reply).unwrap();
|
||||
assert_eq!(
|
||||
r#"{"type":"downgrade_regions","replies":[{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"exists":true,"error":null}]}"#,
|
||||
serialized
|
||||
)
|
||||
);
|
||||
|
||||
let upgrade_region_reply =
|
||||
InstructionReply::UpgradeRegions(UpgradeRegionsReply::single(UpgradeRegionReply {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
ready: true,
|
||||
exists: true,
|
||||
error: None,
|
||||
}));
|
||||
let serialized = serde_json::to_string(&upgrade_region_reply).unwrap();
|
||||
assert_eq!(
|
||||
r#"{"type":"upgrade_regions","replies":[{"region_id":4398046511105,"ready":true,"exists":true,"error":null}]}"#,
|
||||
serialized
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_instruction() {
|
||||
// legacy open region instruction
|
||||
let open_region_instruction = r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#;
|
||||
let open_region_instruction: Instruction =
|
||||
serde_json::from_str(open_region_instruction).unwrap();
|
||||
@@ -677,6 +862,7 @@ mod tests {
|
||||
)]);
|
||||
assert_eq!(open_region_instruction, open_region);
|
||||
|
||||
// legacy close region instruction
|
||||
let close_region_instruction = r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#;
|
||||
let close_region_instruction: Instruction =
|
||||
serde_json::from_str(close_region_instruction).unwrap();
|
||||
@@ -688,6 +874,7 @@ mod tests {
|
||||
}]);
|
||||
assert_eq!(close_region_instruction, close_region);
|
||||
|
||||
// legacy downgrade region instruction
|
||||
let downgrade_region_instruction = r#"{"DowngradeRegions":{"region_id":4398046511105,"flush_timeout":{"secs":1,"nanos":0}}}"#;
|
||||
let downgrade_region_instruction: Instruction =
|
||||
serde_json::from_str(downgrade_region_instruction).unwrap();
|
||||
@@ -697,6 +884,25 @@ mod tests {
|
||||
}]);
|
||||
assert_eq!(downgrade_region_instruction, downgrade_region);
|
||||
|
||||
// legacy upgrade region instruction
|
||||
let upgrade_region_instruction = r#"{"UpgradeRegion":{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"replay_timeout":"1s","location_id":null,"replay_entry_id":null,"metadata_replay_entry_id":null}}"#;
|
||||
let upgrade_region_instruction: Instruction =
|
||||
serde_json::from_str(upgrade_region_instruction).unwrap();
|
||||
let upgrade_region = Instruction::UpgradeRegions(vec![UpgradeRegion {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout: Duration::from_millis(1000),
|
||||
location_id: None,
|
||||
replay_entry_id: None,
|
||||
metadata_replay_entry_id: None,
|
||||
}]);
|
||||
assert_eq!(upgrade_region_instruction, upgrade_region);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_instruction_reply() {
|
||||
// legacy close region reply
|
||||
let close_region_instruction_reply =
|
||||
r#"{"result":true,"error":null,"type":"close_region"}"#;
|
||||
let close_region_instruction_reply: InstructionReply =
|
||||
@@ -707,6 +913,7 @@ mod tests {
|
||||
});
|
||||
assert_eq!(close_region_instruction_reply, close_region_reply);
|
||||
|
||||
// legacy open region reply
|
||||
let open_region_instruction_reply = r#"{"result":true,"error":null,"type":"open_region"}"#;
|
||||
let open_region_instruction_reply: InstructionReply =
|
||||
serde_json::from_str(open_region_instruction_reply).unwrap();
|
||||
@@ -716,6 +923,7 @@ mod tests {
|
||||
});
|
||||
assert_eq!(open_region_instruction_reply, open_region_reply);
|
||||
|
||||
// legacy downgrade region reply
|
||||
let downgrade_region_instruction_reply = r#"{"region_id":4398046511105,"last_entry_id":null,"metadata_last_entry_id":null,"exists":true,"error":null,"type":"downgrade_region"}"#;
|
||||
let downgrade_region_instruction_reply: InstructionReply =
|
||||
serde_json::from_str(downgrade_region_instruction_reply).unwrap();
|
||||
@@ -729,6 +937,19 @@ mod tests {
|
||||
}),
|
||||
);
|
||||
assert_eq!(downgrade_region_instruction_reply, downgrade_region_reply);
|
||||
|
||||
// legacy upgrade region reply
|
||||
let upgrade_region_instruction_reply = r#"{"region_id":4398046511105,"ready":true,"exists":true,"error":null,"type":"upgrade_region"}"#;
|
||||
let upgrade_region_instruction_reply: InstructionReply =
|
||||
serde_json::from_str(upgrade_region_instruction_reply).unwrap();
|
||||
let upgrade_region_reply =
|
||||
InstructionReply::UpgradeRegions(UpgradeRegionsReply::single(UpgradeRegionReply {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
ready: true,
|
||||
exists: true,
|
||||
error: None,
|
||||
}));
|
||||
assert_eq!(upgrade_region_instruction_reply, upgrade_region_reply);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -903,4 +1124,30 @@ mod tests {
|
||||
_ => panic!("Expected FlushRegions instruction"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_get_file_refs_instruction_reply() {
|
||||
let mut manifest = FileRefsManifest::default();
|
||||
let r0 = RegionId::new(1024, 1);
|
||||
let r1 = RegionId::new(1024, 2);
|
||||
manifest
|
||||
.file_refs
|
||||
.insert(r0, HashSet::from([FileId::random()]));
|
||||
manifest
|
||||
.file_refs
|
||||
.insert(r1, HashSet::from([FileId::random()]));
|
||||
manifest.manifest_version.insert(r0, 10);
|
||||
manifest.manifest_version.insert(r1, 20);
|
||||
|
||||
let instruction_reply = InstructionReply::GetFileRefs(GetFileRefsReply {
|
||||
file_refs_manifest: manifest,
|
||||
success: true,
|
||||
error: None,
|
||||
});
|
||||
|
||||
let serialized = serde_json::to_string(&instruction_reply).unwrap();
|
||||
let deserialized = serde_json::from_str(&serialized).unwrap();
|
||||
|
||||
assert_eq!(instruction_reply, deserialized);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,6 +164,25 @@ impl DatanodeTableManager {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub async fn batch_get(
|
||||
&self,
|
||||
keys: &[DatanodeTableKey],
|
||||
) -> Result<HashMap<DatanodeTableKey, DatanodeTableValue>> {
|
||||
let req = BatchGetRequest::default().with_keys(keys.iter().map(|k| k.to_bytes()).collect());
|
||||
let resp = self.kv_backend.batch_get(req).await?;
|
||||
let values = resp
|
||||
.kvs
|
||||
.into_iter()
|
||||
.map(|kv| {
|
||||
Ok((
|
||||
DatanodeTableKey::from_bytes(&kv.key)?,
|
||||
DatanodeTableValue::try_from_raw_value(&kv.value)?,
|
||||
))
|
||||
})
|
||||
.collect::<Result<HashMap<_, _>>>()?;
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn tables(
|
||||
&self,
|
||||
datanode_id: DatanodeId,
|
||||
|
||||
@@ -661,13 +661,32 @@ impl TableRouteStorage {
|
||||
|
||||
/// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`.
|
||||
pub async fn batch_get(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
|
||||
let mut table_routes = self.batch_get_inner(table_ids).await?;
|
||||
self.remap_routes_addresses(&mut table_routes).await?;
|
||||
let raw_table_routes = self.batch_get_inner(table_ids).await?;
|
||||
|
||||
Ok(table_routes)
|
||||
Ok(raw_table_routes
|
||||
.into_iter()
|
||||
.map(|v| v.map(|x| x.inner))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn batch_get_inner(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
|
||||
/// Returns batch of [`TableRouteValue`] wrapped with [`DeserializedValueWithBytes`].
|
||||
///
|
||||
/// The return value is a vector of [`Option<DeserializedValueWithBytes<TableRouteValue>>`].
|
||||
/// Note: This method remaps the addresses of the table routes, but does not update their raw byte representations.
|
||||
pub async fn batch_get_with_raw_bytes(
|
||||
&self,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<Vec<Option<DeserializedValueWithBytes<TableRouteValue>>>> {
|
||||
let mut raw_table_routes = self.batch_get_inner(table_ids).await?;
|
||||
self.remap_routes_addresses(&mut raw_table_routes).await?;
|
||||
|
||||
Ok(raw_table_routes)
|
||||
}
|
||||
|
||||
async fn batch_get_inner(
|
||||
&self,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<Vec<Option<DeserializedValueWithBytes<TableRouteValue>>>> {
|
||||
let keys = table_ids
|
||||
.iter()
|
||||
.map(|id| TableRouteKey::new(*id).to_bytes())
|
||||
@@ -685,7 +704,7 @@ impl TableRouteStorage {
|
||||
keys.into_iter()
|
||||
.map(|key| {
|
||||
if let Some(value) = kvs.get(&key) {
|
||||
Ok(Some(TableRouteValue::try_from_raw_value(value)?))
|
||||
Ok(Some(DeserializedValueWithBytes::from_inner_slice(value)?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
@@ -695,14 +714,14 @@ impl TableRouteStorage {
|
||||
|
||||
async fn remap_routes_addresses(
|
||||
&self,
|
||||
table_routes: &mut [Option<TableRouteValue>],
|
||||
table_routes: &mut [Option<DeserializedValueWithBytes<TableRouteValue>>],
|
||||
) -> Result<()> {
|
||||
let keys = table_routes
|
||||
.iter()
|
||||
.flat_map(|table_route| {
|
||||
table_route
|
||||
.as_ref()
|
||||
.map(extract_address_keys)
|
||||
.map(|x| extract_address_keys(&x.inner))
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.collect::<HashSet<_>>()
|
||||
|
||||
@@ -33,7 +33,7 @@ use crate::rpc::store::{
|
||||
|
||||
// The TopicRegionKey is a key for the topic-region mapping in the kvbackend.
|
||||
// The layout of the key is `__topic_region/{topic_name}/{region_id}`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct TopicRegionKey<'a> {
|
||||
pub region_id: RegionId,
|
||||
pub topic: &'a str,
|
||||
|
||||
@@ -26,7 +26,6 @@ use datatypes::arrow::datatypes::{
|
||||
Int32Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType,
|
||||
};
|
||||
use datatypes::schema::SchemaRef;
|
||||
|
||||
fn prepare_record_batch(rows: usize) -> RecordBatch {
|
||||
let schema = Schema::new(vec![
|
||||
@@ -56,14 +55,6 @@ fn prepare_record_batch(rows: usize) -> RecordBatch {
|
||||
RecordBatch::try_new(Arc::new(schema), columns).unwrap()
|
||||
}
|
||||
|
||||
fn iter_by_greptimedb_values(schema: SchemaRef, record_batch: RecordBatch) {
|
||||
let record_batch =
|
||||
common_recordbatch::RecordBatch::try_from_df_record_batch(schema, record_batch).unwrap();
|
||||
for row in record_batch.rows() {
|
||||
black_box(row);
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_by_loop_rows_and_columns(record_batch: RecordBatch) {
|
||||
for i in 0..record_batch.num_rows() {
|
||||
for column in record_batch.columns() {
|
||||
@@ -125,19 +116,6 @@ pub fn criterion_benchmark(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("iter_record_batch");
|
||||
|
||||
for rows in [1usize, 10, 100, 1_000, 10_000] {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("by_greptimedb_values", rows),
|
||||
&rows,
|
||||
|b, rows| {
|
||||
let record_batch = prepare_record_batch(*rows);
|
||||
let schema =
|
||||
Arc::new(datatypes::schema::Schema::try_from(record_batch.schema()).unwrap());
|
||||
b.iter(|| {
|
||||
iter_by_greptimedb_values(schema.clone(), record_batch.clone());
|
||||
})
|
||||
},
|
||||
);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("by_loop_rows_and_columns", rows),
|
||||
&rows,
|
||||
|
||||
@@ -193,6 +193,13 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Exceeded memory limit: {}", msg))]
|
||||
ExceedMemoryLimit {
|
||||
msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -229,6 +236,8 @@ impl ErrorExt for Error {
|
||||
Error::StreamTimeout { .. } => StatusCode::Cancelled,
|
||||
|
||||
Error::StreamCancelled { .. } => StatusCode::Cancelled,
|
||||
|
||||
Error::ExceedMemoryLimit { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,11 +21,14 @@ pub mod filter;
|
||||
mod recordbatch;
|
||||
pub mod util;
|
||||
|
||||
use std::fmt;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
|
||||
use adapter::RecordBatchMetrics;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::arrow::compute::SortOptions;
|
||||
pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch;
|
||||
@@ -406,6 +409,399 @@ impl<S: Stream<Item = Result<RecordBatch>> + Unpin> Stream for RecordBatchStream
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory permit for a stream, providing privileged access or rate limiting.
|
||||
///
|
||||
/// The permit tracks whether this stream has privileged Top-K status.
|
||||
/// When dropped, it automatically releases any privileged slot it holds.
|
||||
pub struct MemoryPermit {
|
||||
tracker: QueryMemoryTracker,
|
||||
is_privileged: AtomicBool,
|
||||
}
|
||||
|
||||
impl MemoryPermit {
|
||||
/// Check if this permit currently has privileged status.
|
||||
pub fn is_privileged(&self) -> bool {
|
||||
self.is_privileged.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Ensure this permit has privileged status by acquiring a slot if available.
|
||||
/// Returns true if privileged (either already privileged or just acquired privilege).
|
||||
fn ensure_privileged(&self) -> bool {
|
||||
if self.is_privileged.load(Ordering::Acquire) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to claim a privileged slot
|
||||
self.tracker
|
||||
.privileged_count
|
||||
.fetch_update(Ordering::AcqRel, Ordering::Acquire, |count| {
|
||||
if count < self.tracker.privileged_slots {
|
||||
Some(count + 1)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|_| {
|
||||
self.is_privileged.store(true, Ordering::Release);
|
||||
true
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Track additional memory usage with this permit.
|
||||
/// Returns error if limit is exceeded.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `additional` - Additional memory size to track in bytes
|
||||
/// * `stream_tracked` - Total memory already tracked by this stream
|
||||
///
|
||||
/// # Behavior
|
||||
/// - Privileged streams: Can push global memory usage up to full limit
|
||||
/// - Standard-tier streams: Can push global memory usage up to limit * standard_tier_memory_fraction (default: 0.7)
|
||||
/// - Standard-tier streams automatically attempt to acquire privilege if slots become available
|
||||
/// - The configured limit is absolute hard limit - no stream can exceed it
|
||||
pub fn track(&self, additional: usize, stream_tracked: usize) -> Result<()> {
|
||||
// Ensure privileged status if possible
|
||||
let is_privileged = self.ensure_privileged();
|
||||
|
||||
self.tracker
|
||||
.track_internal(additional, is_privileged, stream_tracked)
|
||||
}
|
||||
|
||||
/// Release tracked memory.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `amount` - Amount of memory to release in bytes
|
||||
pub fn release(&self, amount: usize) {
|
||||
self.tracker.release(amount);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for MemoryPermit {
|
||||
fn drop(&mut self) {
|
||||
// Release privileged slot if we had one
|
||||
if self.is_privileged.load(Ordering::Acquire) {
|
||||
self.tracker
|
||||
.privileged_count
|
||||
.fetch_sub(1, Ordering::Release);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory tracker for RecordBatch streams. Clone to share the same limit across queries.
|
||||
///
|
||||
/// Implements a two-tier memory allocation strategy:
|
||||
/// - **Privileged tier**: First N streams (default: 20) can use up to the full memory limit
|
||||
/// - **Standard tier**: Remaining streams are restricted to a fraction of the limit (default: 70%)
|
||||
/// - Privilege is granted on a first-come-first-served basis
|
||||
/// - The configured limit is an absolute hard cap - no stream can exceed it
|
||||
#[derive(Clone)]
|
||||
pub struct QueryMemoryTracker {
|
||||
current: Arc<AtomicUsize>,
|
||||
limit: usize,
|
||||
standard_tier_memory_fraction: f64,
|
||||
privileged_count: Arc<AtomicUsize>,
|
||||
privileged_slots: usize,
|
||||
on_update: Option<Arc<dyn Fn(usize) + Send + Sync>>,
|
||||
on_reject: Option<Arc<dyn Fn() + Send + Sync>>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for QueryMemoryTracker {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("QueryMemoryTracker")
|
||||
.field("current", &self.current.load(Ordering::Acquire))
|
||||
.field("limit", &self.limit)
|
||||
.field(
|
||||
"standard_tier_memory_fraction",
|
||||
&self.standard_tier_memory_fraction,
|
||||
)
|
||||
.field(
|
||||
"privileged_count",
|
||||
&self.privileged_count.load(Ordering::Acquire),
|
||||
)
|
||||
.field("privileged_slots", &self.privileged_slots)
|
||||
.field("on_update", &self.on_update.is_some())
|
||||
.field("on_reject", &self.on_reject.is_some())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryMemoryTracker {
|
||||
// Default privileged slots when max_concurrent_queries is 0.
|
||||
const DEFAULT_PRIVILEGED_SLOTS: usize = 20;
|
||||
// Ratio for privileged tier: 70% queries get privileged access, standard tier uses 70% memory.
|
||||
const DEFAULT_PRIVILEGED_TIER_RATIO: f64 = 0.7;
|
||||
|
||||
/// Create a new memory tracker with the given limit and max_concurrent_queries.
|
||||
/// Calculates privileged slots as 70% of max_concurrent_queries (or 20 if max_concurrent_queries is 0).
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `limit` - Maximum memory usage in bytes (hard limit for all streams). 0 means unlimited.
|
||||
/// * `max_concurrent_queries` - Maximum number of concurrent queries (0 = unlimited).
|
||||
pub fn new(limit: usize, max_concurrent_queries: usize) -> Self {
|
||||
let privileged_slots = Self::calculate_privileged_slots(max_concurrent_queries);
|
||||
Self::with_privileged_slots(limit, privileged_slots)
|
||||
}
|
||||
|
||||
/// Create a new memory tracker with custom privileged slots limit.
|
||||
pub fn with_privileged_slots(limit: usize, privileged_slots: usize) -> Self {
|
||||
Self::with_config(limit, privileged_slots, Self::DEFAULT_PRIVILEGED_TIER_RATIO)
|
||||
}
|
||||
|
||||
/// Create a new memory tracker with full configuration.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `limit` - Maximum memory usage in bytes (hard limit for all streams). 0 means unlimited.
|
||||
/// * `privileged_slots` - Maximum number of streams that can get privileged status.
|
||||
/// * `standard_tier_memory_fraction` - Memory fraction for standard-tier streams (range: [0.0, 1.0]).
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `standard_tier_memory_fraction` is not in the range [0.0, 1.0].
|
||||
pub fn with_config(
|
||||
limit: usize,
|
||||
privileged_slots: usize,
|
||||
standard_tier_memory_fraction: f64,
|
||||
) -> Self {
|
||||
assert!(
|
||||
(0.0..=1.0).contains(&standard_tier_memory_fraction),
|
||||
"standard_tier_memory_fraction must be in [0.0, 1.0], got {}",
|
||||
standard_tier_memory_fraction
|
||||
);
|
||||
|
||||
Self {
|
||||
current: Arc::new(AtomicUsize::new(0)),
|
||||
limit,
|
||||
standard_tier_memory_fraction,
|
||||
privileged_count: Arc::new(AtomicUsize::new(0)),
|
||||
privileged_slots,
|
||||
on_update: None,
|
||||
on_reject: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a new permit for memory tracking.
|
||||
/// The first `privileged_slots` permits get privileged status automatically.
|
||||
/// The returned permit can be shared across multiple streams of the same query.
|
||||
pub fn register_permit(&self) -> MemoryPermit {
|
||||
// Try to claim a privileged slot
|
||||
let is_privileged = self
|
||||
.privileged_count
|
||||
.fetch_update(Ordering::AcqRel, Ordering::Acquire, |count| {
|
||||
if count < self.privileged_slots {
|
||||
Some(count + 1)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.is_ok();
|
||||
|
||||
MemoryPermit {
|
||||
tracker: self.clone(),
|
||||
is_privileged: AtomicBool::new(is_privileged),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a callback to be called whenever the usage changes successfully.
|
||||
/// The callback receives the new total usage in bytes.
|
||||
///
|
||||
/// # Note
|
||||
/// The callback is called after both successful `track()` and `release()` operations.
|
||||
/// It is called even when `limit == 0` (unlimited mode) to track actual usage.
|
||||
pub fn with_on_update<F>(mut self, on_update: F) -> Self
|
||||
where
|
||||
F: Fn(usize) + Send + Sync + 'static,
|
||||
{
|
||||
self.on_update = Some(Arc::new(on_update));
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a callback to be called when memory allocation is rejected.
|
||||
///
|
||||
/// # Note
|
||||
/// This is only called when `track()` fails due to exceeding the limit.
|
||||
/// It is never called when `limit == 0` (unlimited mode).
|
||||
pub fn with_on_reject<F>(mut self, on_reject: F) -> Self
|
||||
where
|
||||
F: Fn() + Send + Sync + 'static,
|
||||
{
|
||||
self.on_reject = Some(Arc::new(on_reject));
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the current memory usage in bytes.
|
||||
pub fn current(&self) -> usize {
|
||||
self.current.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
fn calculate_privileged_slots(max_concurrent_queries: usize) -> usize {
|
||||
if max_concurrent_queries == 0 {
|
||||
Self::DEFAULT_PRIVILEGED_SLOTS
|
||||
} else {
|
||||
((max_concurrent_queries as f64 * Self::DEFAULT_PRIVILEGED_TIER_RATIO) as usize).max(1)
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal method to track additional memory usage.
|
||||
///
|
||||
/// Called by `MemoryPermit::track()`. Use `MemoryPermit::track()` instead of calling this directly.
|
||||
fn track_internal(
|
||||
&self,
|
||||
additional: usize,
|
||||
is_privileged: bool,
|
||||
stream_tracked: usize,
|
||||
) -> Result<()> {
|
||||
// Calculate effective global limit based on stream privilege
|
||||
// Privileged streams: can push global usage up to full limit
|
||||
// Standard-tier streams: can only push global usage up to fraction of limit
|
||||
let effective_limit = if is_privileged {
|
||||
self.limit
|
||||
} else {
|
||||
(self.limit as f64 * self.standard_tier_memory_fraction) as usize
|
||||
};
|
||||
|
||||
let mut new_total = 0;
|
||||
let result = self
|
||||
.current
|
||||
.fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
|
||||
new_total = current.saturating_add(additional);
|
||||
|
||||
if self.limit == 0 {
|
||||
// Unlimited mode
|
||||
return Some(new_total);
|
||||
}
|
||||
|
||||
// Check if new global total exceeds effective limit
|
||||
// The configured limit is absolute hard limit - no stream can exceed it
|
||||
if new_total <= effective_limit {
|
||||
Some(new_total)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
if let Some(callback) = &self.on_update {
|
||||
callback(new_total);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(current) => {
|
||||
if let Some(callback) = &self.on_reject {
|
||||
callback();
|
||||
}
|
||||
let msg = format!(
|
||||
"{} requested, {} used globally ({}%), {} used by this stream (privileged: {}), effective limit: {} ({}%), hard limit: {}",
|
||||
ReadableSize(additional as u64),
|
||||
ReadableSize(current as u64),
|
||||
if self.limit > 0 {
|
||||
current * 100 / self.limit
|
||||
} else {
|
||||
0
|
||||
},
|
||||
ReadableSize(stream_tracked as u64),
|
||||
is_privileged,
|
||||
ReadableSize(effective_limit as u64),
|
||||
if self.limit > 0 {
|
||||
effective_limit * 100 / self.limit
|
||||
} else {
|
||||
0
|
||||
},
|
||||
ReadableSize(self.limit as u64)
|
||||
);
|
||||
error::ExceedMemoryLimitSnafu { msg }.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Release tracked memory.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `amount` - Amount of memory to release in bytes
|
||||
pub fn release(&self, amount: usize) {
|
||||
if let Ok(old_value) =
|
||||
self.current
|
||||
.fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| {
|
||||
Some(current.saturating_sub(amount))
|
||||
})
|
||||
&& let Some(callback) = &self.on_update
|
||||
{
|
||||
callback(old_value.saturating_sub(amount));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper stream that tracks memory usage of RecordBatches.
|
||||
pub struct MemoryTrackedStream {
|
||||
inner: SendableRecordBatchStream,
|
||||
permit: Arc<MemoryPermit>,
|
||||
// Total tracked size, released when stream drops.
|
||||
total_tracked: usize,
|
||||
}
|
||||
|
||||
impl MemoryTrackedStream {
|
||||
pub fn new(inner: SendableRecordBatchStream, permit: Arc<MemoryPermit>) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
permit,
|
||||
total_tracked: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for MemoryTrackedStream {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.inner).poll_next(cx) {
|
||||
Poll::Ready(Some(Ok(batch))) => {
|
||||
let additional = batch
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| c.memory_size())
|
||||
.sum::<usize>();
|
||||
|
||||
if let Err(e) = self.permit.track(additional, self.total_tracked) {
|
||||
return Poll::Ready(Some(Err(e)));
|
||||
}
|
||||
|
||||
self.total_tracked += additional;
|
||||
|
||||
Poll::Ready(Some(Ok(batch)))
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.inner.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for MemoryTrackedStream {
|
||||
fn drop(&mut self) {
|
||||
if self.total_tracked > 0 {
|
||||
self.permit.release(self.total_tracked);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for MemoryTrackedStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.inner.schema()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
self.inner.output_ordering()
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
self.inner.metrics()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
@@ -496,4 +892,157 @@ mod tests {
|
||||
assert_eq!(collected[0], batch1);
|
||||
assert_eq!(collected[1], batch2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_memory_tracker_basic() {
|
||||
let tracker = Arc::new(QueryMemoryTracker::new(1000, 0));
|
||||
|
||||
// Register first stream - should get privileged status
|
||||
let permit1 = tracker.register_permit();
|
||||
assert!(permit1.is_privileged());
|
||||
|
||||
// Privileged stream can use up to limit
|
||||
assert!(permit1.track(500, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 500);
|
||||
|
||||
// Register second stream - also privileged
|
||||
let permit2 = tracker.register_permit();
|
||||
assert!(permit2.is_privileged());
|
||||
// Can add more but cannot exceed hard limit (1000)
|
||||
assert!(permit2.track(400, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 900);
|
||||
|
||||
permit1.release(500);
|
||||
permit2.release(400);
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_memory_tracker_privileged_limit() {
|
||||
// Privileged slots = 2 for easy testing
|
||||
// Limit: 1000, standard-tier fraction: 0.7 (default)
|
||||
// Privileged can push global to 1000, standard-tier can push global to 700
|
||||
let tracker = Arc::new(QueryMemoryTracker::with_privileged_slots(1000, 2));
|
||||
|
||||
// First 2 streams are privileged
|
||||
let permit1 = tracker.register_permit();
|
||||
let permit2 = tracker.register_permit();
|
||||
assert!(permit1.is_privileged());
|
||||
assert!(permit2.is_privileged());
|
||||
|
||||
// Third stream is standard-tier (not privileged)
|
||||
let permit3 = tracker.register_permit();
|
||||
assert!(!permit3.is_privileged());
|
||||
|
||||
// Privileged stream uses some memory
|
||||
assert!(permit1.track(300, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 300);
|
||||
|
||||
// Standard-tier can add up to 400 (total becomes 700, its effective limit)
|
||||
assert!(permit3.track(400, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 700);
|
||||
|
||||
// Standard-tier stream cannot push global beyond 700
|
||||
let err = permit3.track(100, 400).unwrap_err();
|
||||
let err_msg = err.to_string();
|
||||
assert!(err_msg.contains("400B used by this stream"));
|
||||
assert!(err_msg.contains("effective limit: 700B (70%)"));
|
||||
assert!(err_msg.contains("700B used globally (70%)"));
|
||||
assert_eq!(tracker.current(), 700);
|
||||
|
||||
permit1.release(300);
|
||||
permit3.release(400);
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_memory_tracker_promotion() {
|
||||
// Privileged slots = 1 for easy testing
|
||||
let tracker = Arc::new(QueryMemoryTracker::with_privileged_slots(1000, 1));
|
||||
|
||||
// First stream is privileged
|
||||
let permit1 = tracker.register_permit();
|
||||
assert!(permit1.is_privileged());
|
||||
|
||||
// Second stream is standard-tier (can only use 500)
|
||||
let permit2 = tracker.register_permit();
|
||||
assert!(!permit2.is_privileged());
|
||||
|
||||
// Standard-tier can only track 500
|
||||
assert!(permit2.track(400, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 400);
|
||||
|
||||
// Drop first permit to release privileged slot
|
||||
drop(permit1);
|
||||
|
||||
// Second stream can now be promoted and use more memory
|
||||
assert!(permit2.track(500, 400).is_ok());
|
||||
assert!(permit2.is_privileged());
|
||||
assert_eq!(tracker.current(), 900);
|
||||
|
||||
permit2.release(900);
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_memory_tracker_privileged_hard_limit() {
|
||||
// Test that the configured limit is absolute hard limit for all streams
|
||||
// Privileged: can use full limit (1000)
|
||||
// Standard-tier: can use 0.7x limit (700 with defaults)
|
||||
let tracker = Arc::new(QueryMemoryTracker::new(1000, 0));
|
||||
|
||||
let permit1 = tracker.register_permit();
|
||||
assert!(permit1.is_privileged());
|
||||
|
||||
// Privileged can use up to full limit (1000)
|
||||
assert!(permit1.track(900, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 900);
|
||||
|
||||
// Privileged cannot exceed hard limit (1000)
|
||||
assert!(permit1.track(200, 900).is_err());
|
||||
assert_eq!(tracker.current(), 900);
|
||||
|
||||
// Can add within hard limit
|
||||
assert!(permit1.track(100, 900).is_ok());
|
||||
assert_eq!(tracker.current(), 1000);
|
||||
|
||||
// Cannot exceed even by 1 byte
|
||||
assert!(permit1.track(1, 1000).is_err());
|
||||
assert_eq!(tracker.current(), 1000);
|
||||
|
||||
permit1.release(1000);
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_memory_tracker_standard_tier_fraction() {
|
||||
// Test standard-tier streams use fraction of limit
|
||||
// Limit: 1000, default fraction: 0.7, so standard-tier can use 700
|
||||
let tracker = Arc::new(QueryMemoryTracker::with_privileged_slots(1000, 1));
|
||||
|
||||
let permit1 = tracker.register_permit();
|
||||
assert!(permit1.is_privileged());
|
||||
|
||||
let permit2 = tracker.register_permit();
|
||||
assert!(!permit2.is_privileged());
|
||||
|
||||
// Standard-tier can use up to 700 (1000 * 0.7 default)
|
||||
assert!(permit2.track(600, 0).is_ok());
|
||||
assert_eq!(tracker.current(), 600);
|
||||
|
||||
// Cannot exceed standard-tier limit (700)
|
||||
assert!(permit2.track(200, 600).is_err());
|
||||
assert_eq!(tracker.current(), 600);
|
||||
|
||||
// Can add within standard-tier limit
|
||||
assert!(permit2.track(100, 600).is_ok());
|
||||
assert_eq!(tracker.current(), 700);
|
||||
|
||||
// Cannot exceed standard-tier limit
|
||||
assert!(permit2.track(1, 700).is_err());
|
||||
assert_eq!(tracker.current(), 700);
|
||||
|
||||
permit2.release(700);
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ use datafusion_common::arrow::datatypes::{DataType as ArrowDataType, SchemaRef a
|
||||
use datatypes::arrow::array::RecordBatchOptions;
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use serde::ser::{Error, SerializeStruct};
|
||||
use serde::{Serialize, Serializer};
|
||||
@@ -194,11 +193,6 @@ impl RecordBatch {
|
||||
self.df_record_batch.num_rows()
|
||||
}
|
||||
|
||||
/// Create an iterator to traverse the data by row
|
||||
pub fn rows(&self) -> RecordBatchRowIterator<'_> {
|
||||
RecordBatchRowIterator::new(self)
|
||||
}
|
||||
|
||||
pub fn column_vectors(
|
||||
&self,
|
||||
table_name: &str,
|
||||
@@ -277,44 +271,6 @@ impl Serialize for RecordBatch {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RecordBatchRowIterator<'a> {
|
||||
record_batch: &'a RecordBatch,
|
||||
rows: usize,
|
||||
columns: usize,
|
||||
row_cursor: usize,
|
||||
}
|
||||
|
||||
impl<'a> RecordBatchRowIterator<'a> {
|
||||
fn new(record_batch: &'a RecordBatch) -> RecordBatchRowIterator<'a> {
|
||||
RecordBatchRowIterator {
|
||||
record_batch,
|
||||
rows: record_batch.df_record_batch.num_rows(),
|
||||
columns: record_batch.df_record_batch.num_columns(),
|
||||
row_cursor: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for RecordBatchRowIterator<'_> {
|
||||
type Item = Vec<Value>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.row_cursor == self.rows {
|
||||
None
|
||||
} else {
|
||||
let mut row = Vec::with_capacity(self.columns);
|
||||
|
||||
for col in 0..self.columns {
|
||||
let column = self.record_batch.column(col);
|
||||
row.push(column.get(self.row_cursor));
|
||||
}
|
||||
|
||||
self.row_cursor += 1;
|
||||
Some(row)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// merge multiple recordbatch into a single
|
||||
pub fn merge_record_batches(schema: SchemaRef, batches: &[RecordBatch]) -> Result<RecordBatch> {
|
||||
let batches_len = batches.len();
|
||||
@@ -349,7 +305,9 @@ pub fn merge_record_batches(schema: SchemaRef, batches: &[RecordBatch]) -> Resul
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::arrow::array::{AsArray, UInt32Array};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema, UInt32Type};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::{StringVector, UInt32Vector};
|
||||
@@ -407,64 +365,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_record_batch_visitor() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("numbers", ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new("strings", ConcreteDataType::string_datatype(), true),
|
||||
];
|
||||
let schema = Arc::new(Schema::new(column_schemas));
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(UInt32Vector::from_slice(vec![1, 2, 3, 4])),
|
||||
Arc::new(StringVector::from(vec![
|
||||
None,
|
||||
Some("hello"),
|
||||
Some("greptime"),
|
||||
None,
|
||||
])),
|
||||
];
|
||||
let recordbatch = RecordBatch::new(schema, columns).unwrap();
|
||||
|
||||
let mut record_batch_iter = recordbatch.rows();
|
||||
assert_eq!(
|
||||
vec![Value::UInt32(1), Value::Null],
|
||||
record_batch_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<Value>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
vec![Value::UInt32(2), Value::String("hello".into())],
|
||||
record_batch_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<Value>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
vec![Value::UInt32(3), Value::String("greptime".into())],
|
||||
record_batch_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<Value>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
vec![Value::UInt32(4), Value::Null],
|
||||
record_batch_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<Value>>()
|
||||
);
|
||||
|
||||
assert!(record_batch_iter.next().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_record_batch_slice() {
|
||||
let column_schemas = vec![
|
||||
@@ -483,26 +383,16 @@ mod tests {
|
||||
];
|
||||
let recordbatch = RecordBatch::new(schema, columns).unwrap();
|
||||
let recordbatch = recordbatch.slice(1, 2).expect("recordbatch slice");
|
||||
let mut record_batch_iter = recordbatch.rows();
|
||||
assert_eq!(
|
||||
vec![Value::UInt32(2), Value::String("hello".into())],
|
||||
record_batch_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<Value>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
vec![Value::UInt32(3), Value::String("greptime".into())],
|
||||
record_batch_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<Vec<Value>>()
|
||||
);
|
||||
let expected = &UInt32Array::from_iter_values([2u32, 3]);
|
||||
let array = recordbatch.column(0).to_arrow_array();
|
||||
let actual = array.as_primitive::<UInt32Type>();
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
assert!(record_batch_iter.next().is_none());
|
||||
let expected = &StringArray::from(vec!["hello", "greptime"]);
|
||||
let array = recordbatch.column(1).to_arrow_array();
|
||||
let actual = array.as_string::<i32>();
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
assert!(recordbatch.slice(1, 5).is_err());
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::str::FromStr;
|
||||
|
||||
use chrono::{FixedOffset, TimeZone};
|
||||
use chrono_tz::{OffsetComponents, Tz};
|
||||
@@ -102,7 +101,7 @@ impl Timezone {
|
||||
.parse::<u32>()
|
||||
.context(ParseOffsetStrSnafu { raw: tz_string })?;
|
||||
Self::hours_mins_opt(hrs, mins)
|
||||
} else if let Ok(tz) = Tz::from_str(tz_string) {
|
||||
} else if let Ok(tz) = Tz::from_str_insensitive(tz_string) {
|
||||
Ok(Self::Named(tz))
|
||||
} else {
|
||||
ParseTimezoneNameSnafu { raw: tz_string }.fail()
|
||||
@@ -203,6 +202,10 @@ mod tests {
|
||||
Timezone::Named(Tz::Asia__Shanghai),
|
||||
Timezone::from_tz_string("Asia/Shanghai").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
Timezone::Named(Tz::Asia__Shanghai),
|
||||
Timezone::from_tz_string("Asia/ShangHai").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
Timezone::Named(Tz::UTC),
|
||||
Timezone::from_tz_string("UTC").unwrap()
|
||||
|
||||
@@ -11,7 +11,7 @@ workspace = true
|
||||
codec = ["dep:serde"]
|
||||
|
||||
[dependencies]
|
||||
const_format = "0.2"
|
||||
const_format.workspace = true
|
||||
serde = { workspace = true, optional = true }
|
||||
shadow-rs = { version = "1.2.1", default-features = false }
|
||||
|
||||
|
||||
@@ -522,6 +522,7 @@ impl DatanodeBuilder {
|
||||
file_ref_manager,
|
||||
partition_expr_fetcher.clone(),
|
||||
plugins,
|
||||
opts.max_concurrent_queries,
|
||||
);
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -564,6 +565,7 @@ impl DatanodeBuilder {
|
||||
file_ref_manager,
|
||||
partition_expr_fetcher,
|
||||
plugins,
|
||||
opts.max_concurrent_queries,
|
||||
);
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -585,6 +587,7 @@ impl DatanodeBuilder {
|
||||
file_ref_manager,
|
||||
partition_expr_fetcher.clone(),
|
||||
plugins,
|
||||
opts.max_concurrent_queries,
|
||||
);
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
|
||||
@@ -322,6 +322,21 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to run gc for region {}", region_id))]
|
||||
GcMitoEngine {
|
||||
region_id: RegionId,
|
||||
source: mito2::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid arguments for GC: {}", msg))]
|
||||
InvalidGcArgs {
|
||||
msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list SST entries from storage"))]
|
||||
ListStorageSsts {
|
||||
#[snafu(implicit)]
|
||||
@@ -446,9 +461,11 @@ impl ErrorExt for Error {
|
||||
|
||||
AsyncTaskExecute { source, .. } => source.status_code(),
|
||||
|
||||
CreateDir { .. } | RemoveDir { .. } | ShutdownInstance { .. } | DataFusion { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
CreateDir { .. }
|
||||
| RemoveDir { .. }
|
||||
| ShutdownInstance { .. }
|
||||
| DataFusion { .. }
|
||||
| InvalidGcArgs { .. } => StatusCode::Internal,
|
||||
|
||||
RegionNotFound { .. } => StatusCode::RegionNotFound,
|
||||
RegionNotReady { .. } => StatusCode::RegionNotReady,
|
||||
@@ -466,7 +483,7 @@ impl ErrorExt for Error {
|
||||
StopRegionEngine { source, .. } => source.status_code(),
|
||||
|
||||
FindLogicalRegions { source, .. } => source.status_code(),
|
||||
BuildMitoEngine { source, .. } => source.status_code(),
|
||||
BuildMitoEngine { source, .. } | GcMitoEngine { source, .. } => source.status_code(),
|
||||
BuildMetricEngine { source, .. } => source.status_code(),
|
||||
ListStorageSsts { source, .. } => source.status_code(),
|
||||
ConcurrentQueryLimiterClosed { .. } | ConcurrentQueryLimiterTimeout { .. } => {
|
||||
|
||||
@@ -36,14 +36,14 @@ use common_workload::DatanodeWorkloadType;
|
||||
use meta_client::MetaClientRef;
|
||||
use meta_client::client::{HeartbeatSender, MetaClient};
|
||||
use servers::addrs;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt as _, ResultExt};
|
||||
use tokio::sync::{Notify, mpsc};
|
||||
use tokio::time::Instant;
|
||||
|
||||
use self::handler::RegionHeartbeatResponseHandler;
|
||||
use crate::alive_keeper::{CountdownTaskHandlerExtRef, RegionAliveKeeper};
|
||||
use crate::config::DatanodeOptions;
|
||||
use crate::error::{self, MetaClientInitSnafu, Result};
|
||||
use crate::error::{self, MetaClientInitSnafu, RegionEngineNotFoundSnafu, Result};
|
||||
use crate::event_listener::RegionServerEventReceiver;
|
||||
use crate::metrics::{self, HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT};
|
||||
use crate::region_server::RegionServer;
|
||||
@@ -242,12 +242,18 @@ impl HeartbeatTask {
|
||||
let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
|
||||
let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
|
||||
let resource_stat = self.resource_stat.clone();
|
||||
let gc_limiter = self
|
||||
.region_server
|
||||
.mito_engine()
|
||||
.context(RegionEngineNotFoundSnafu { name: "mito" })?
|
||||
.gc_limiter();
|
||||
|
||||
common_runtime::spawn_hb(async move {
|
||||
let sleep = tokio::time::sleep(Duration::from_millis(0));
|
||||
tokio::pin!(sleep);
|
||||
|
||||
let build_info = common_version::build_info();
|
||||
|
||||
let heartbeat_request = HeartbeatRequest {
|
||||
peer: self_peer,
|
||||
node_epoch,
|
||||
@@ -283,8 +289,13 @@ impl HeartbeatTask {
|
||||
if let Some(message) = message {
|
||||
match outgoing_message_to_mailbox_message(message) {
|
||||
Ok(message) => {
|
||||
let mut extensions = heartbeat_request.extensions.clone();
|
||||
let gc_stat = gc_limiter.gc_stat();
|
||||
gc_stat.into_extensions(&mut extensions);
|
||||
|
||||
let req = HeartbeatRequest {
|
||||
mailbox_message: Some(message),
|
||||
extensions,
|
||||
..heartbeat_request.clone()
|
||||
};
|
||||
HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc();
|
||||
@@ -305,10 +316,16 @@ impl HeartbeatTask {
|
||||
let topic_stats = region_server_clone.topic_stats();
|
||||
let now = Instant::now();
|
||||
let duration_since_epoch = (now - epoch).as_millis() as u64;
|
||||
|
||||
let mut extensions = heartbeat_request.extensions.clone();
|
||||
let gc_stat = gc_limiter.gc_stat();
|
||||
gc_stat.into_extensions(&mut extensions);
|
||||
|
||||
let mut req = HeartbeatRequest {
|
||||
region_stats,
|
||||
topic_stats,
|
||||
duration_since_epoch,
|
||||
extensions,
|
||||
..heartbeat_request.clone()
|
||||
};
|
||||
|
||||
|
||||
@@ -20,16 +20,21 @@ use common_meta::heartbeat::handler::{
|
||||
use common_meta::instruction::{Instruction, InstructionReply};
|
||||
use common_telemetry::error;
|
||||
use snafu::OptionExt;
|
||||
use store_api::storage::GcReport;
|
||||
|
||||
mod close_region;
|
||||
mod downgrade_region;
|
||||
mod file_ref;
|
||||
mod flush_region;
|
||||
mod gc_worker;
|
||||
mod open_region;
|
||||
mod upgrade_region;
|
||||
|
||||
use crate::heartbeat::handler::close_region::CloseRegionsHandler;
|
||||
use crate::heartbeat::handler::downgrade_region::DowngradeRegionsHandler;
|
||||
use crate::heartbeat::handler::file_ref::GetFileRefsHandler;
|
||||
use crate::heartbeat::handler::flush_region::FlushRegionsHandler;
|
||||
use crate::heartbeat::handler::gc_worker::GcRegionsHandler;
|
||||
use crate::heartbeat::handler::open_region::OpenRegionsHandler;
|
||||
use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
|
||||
use crate::heartbeat::task_tracker::TaskTracker;
|
||||
@@ -39,10 +44,10 @@ use crate::region_server::RegionServer;
|
||||
#[derive(Clone)]
|
||||
pub struct RegionHeartbeatResponseHandler {
|
||||
region_server: RegionServer,
|
||||
catchup_tasks: TaskTracker<()>,
|
||||
downgrade_tasks: TaskTracker<()>,
|
||||
flush_tasks: TaskTracker<()>,
|
||||
open_region_parallelism: usize,
|
||||
gc_tasks: TaskTracker<GcReport>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -58,9 +63,9 @@ pub trait InstructionHandler: Send + Sync {
|
||||
#[derive(Clone)]
|
||||
pub struct HandlerContext {
|
||||
region_server: RegionServer,
|
||||
catchup_tasks: TaskTracker<()>,
|
||||
downgrade_tasks: TaskTracker<()>,
|
||||
flush_tasks: TaskTracker<()>,
|
||||
gc_tasks: TaskTracker<GcReport>,
|
||||
}
|
||||
|
||||
impl HandlerContext {
|
||||
@@ -68,9 +73,9 @@ impl HandlerContext {
|
||||
pub fn new_for_test(region_server: RegionServer) -> Self {
|
||||
Self {
|
||||
region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
downgrade_tasks: TaskTracker::new(),
|
||||
flush_tasks: TaskTracker::new(),
|
||||
gc_tasks: TaskTracker::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,11 +85,11 @@ impl RegionHeartbeatResponseHandler {
|
||||
pub fn new(region_server: RegionServer) -> Self {
|
||||
Self {
|
||||
region_server,
|
||||
catchup_tasks: TaskTracker::new(),
|
||||
downgrade_tasks: TaskTracker::new(),
|
||||
flush_tasks: TaskTracker::new(),
|
||||
// Default to half of the number of CPUs.
|
||||
open_region_parallelism: (num_cpus::get() / 2).max(1),
|
||||
gc_tasks: TaskTracker::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +110,14 @@ impl RegionHeartbeatResponseHandler {
|
||||
)),
|
||||
Instruction::FlushRegions(_) => Ok(Box::new(FlushRegionsHandler.into())),
|
||||
Instruction::DowngradeRegions(_) => Ok(Box::new(DowngradeRegionsHandler.into())),
|
||||
Instruction::UpgradeRegion(_) => Ok(Box::new(UpgradeRegionsHandler.into())),
|
||||
Instruction::UpgradeRegions(_) => Ok(Box::new(
|
||||
UpgradeRegionsHandler {
|
||||
upgrade_region_parallelism: self.open_region_parallelism,
|
||||
}
|
||||
.into(),
|
||||
)),
|
||||
Instruction::GetFileRefs(_) => Ok(Box::new(GetFileRefsHandler.into())),
|
||||
Instruction::GcRegions(_) => Ok(Box::new(GcRegionsHandler.into())),
|
||||
Instruction::InvalidateCaches(_) => InvalidHeartbeatResponseSnafu.fail(),
|
||||
}
|
||||
}
|
||||
@@ -118,6 +130,8 @@ pub enum InstructionHandlers {
|
||||
FlushRegions(FlushRegionsHandler),
|
||||
DowngradeRegions(DowngradeRegionsHandler),
|
||||
UpgradeRegions(UpgradeRegionsHandler),
|
||||
GetFileRefs(GetFileRefsHandler),
|
||||
GcRegions(GcRegionsHandler),
|
||||
}
|
||||
|
||||
macro_rules! impl_from_handler {
|
||||
@@ -137,7 +151,9 @@ impl_from_handler!(
|
||||
OpenRegionsHandler => OpenRegions,
|
||||
FlushRegionsHandler => FlushRegions,
|
||||
DowngradeRegionsHandler => DowngradeRegions,
|
||||
UpgradeRegionsHandler => UpgradeRegions
|
||||
UpgradeRegionsHandler => UpgradeRegions,
|
||||
GetFileRefsHandler => GetFileRefs,
|
||||
GcRegionsHandler => GcRegions
|
||||
);
|
||||
|
||||
macro_rules! dispatch_instr {
|
||||
@@ -179,7 +195,9 @@ dispatch_instr!(
|
||||
OpenRegions => OpenRegions,
|
||||
FlushRegions => FlushRegions,
|
||||
DowngradeRegions => DowngradeRegions,
|
||||
UpgradeRegion => UpgradeRegions,
|
||||
UpgradeRegions => UpgradeRegions,
|
||||
GetFileRefs => GetFileRefs,
|
||||
GcRegions => GcRegions,
|
||||
);
|
||||
|
||||
#[async_trait]
|
||||
@@ -199,18 +217,18 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
|
||||
|
||||
let mailbox = ctx.mailbox.clone();
|
||||
let region_server = self.region_server.clone();
|
||||
let catchup_tasks = self.catchup_tasks.clone();
|
||||
let downgrade_tasks = self.downgrade_tasks.clone();
|
||||
let flush_tasks = self.flush_tasks.clone();
|
||||
let gc_tasks = self.gc_tasks.clone();
|
||||
let handler = self.build_handler(&instruction)?;
|
||||
let _handle = common_runtime::spawn_global(async move {
|
||||
let reply = handler
|
||||
.handle(
|
||||
&HandlerContext {
|
||||
region_server,
|
||||
catchup_tasks,
|
||||
downgrade_tasks,
|
||||
flush_tasks,
|
||||
gc_tasks,
|
||||
},
|
||||
instruction,
|
||||
)
|
||||
@@ -315,10 +333,10 @@ mod tests {
|
||||
);
|
||||
|
||||
// Upgrade region
|
||||
let instruction = Instruction::UpgradeRegion(UpgradeRegion {
|
||||
let instruction = Instruction::UpgradeRegions(vec![UpgradeRegion {
|
||||
region_id,
|
||||
..Default::default()
|
||||
});
|
||||
}]);
|
||||
assert!(
|
||||
heartbeat_handler.is_acceptable(&heartbeat_env.create_handler_ctx((meta, instruction)))
|
||||
);
|
||||
|
||||
62
src/datanode/src/heartbeat/handler/file_ref.rs
Normal file
62
src/datanode/src/heartbeat/handler/file_ref.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::instruction::{GetFileRefs, GetFileRefsReply, InstructionReply};
|
||||
use store_api::storage::FileRefsManifest;
|
||||
|
||||
use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
|
||||
|
||||
pub struct GetFileRefsHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InstructionHandler for GetFileRefsHandler {
|
||||
type Instruction = GetFileRefs;
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
ctx: &HandlerContext,
|
||||
get_file_refs: Self::Instruction,
|
||||
) -> Option<InstructionReply> {
|
||||
let region_server = &ctx.region_server;
|
||||
|
||||
// Get the MitoEngine
|
||||
let Some(mito_engine) = region_server.mito_engine() else {
|
||||
return Some(InstructionReply::GetFileRefs(GetFileRefsReply {
|
||||
file_refs_manifest: FileRefsManifest::default(),
|
||||
success: false,
|
||||
error: Some("MitoEngine not found".to_string()),
|
||||
}));
|
||||
};
|
||||
|
||||
match mito_engine
|
||||
.get_snapshot_of_unmanifested_refs(get_file_refs.region_ids)
|
||||
.await
|
||||
{
|
||||
Ok(all_file_refs) => {
|
||||
// Return the file references
|
||||
Some(InstructionReply::GetFileRefs(GetFileRefsReply {
|
||||
file_refs_manifest: all_file_refs,
|
||||
success: true,
|
||||
error: None,
|
||||
}))
|
||||
}
|
||||
Err(e) => Some(InstructionReply::GetFileRefs(GetFileRefsReply {
|
||||
file_refs_manifest: FileRefsManifest::default(),
|
||||
success: false,
|
||||
error: Some(format!("Failed to get file refs: {}", e.output_msg())),
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
156
src/datanode/src/heartbeat/handler/gc_worker.rs
Normal file
156
src/datanode/src/heartbeat/handler/gc_worker.rs
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_meta::instruction::{GcRegions, GcRegionsReply, InstructionReply};
|
||||
use common_telemetry::{debug, warn};
|
||||
use mito2::gc::LocalGcWorker;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{FileRefsManifest, RegionId};
|
||||
|
||||
use crate::error::{GcMitoEngineSnafu, InvalidGcArgsSnafu, Result, UnexpectedSnafu};
|
||||
use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
|
||||
|
||||
pub struct GcRegionsHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InstructionHandler for GcRegionsHandler {
|
||||
type Instruction = GcRegions;
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
ctx: &HandlerContext,
|
||||
gc_regions: Self::Instruction,
|
||||
) -> Option<InstructionReply> {
|
||||
let region_ids = gc_regions.regions.clone();
|
||||
debug!("Received gc regions instruction: {:?}", region_ids);
|
||||
|
||||
let is_same_table = region_ids.windows(2).all(|w| {
|
||||
let t1 = w[0].table_id();
|
||||
let t2 = w[1].table_id();
|
||||
t1 == t2
|
||||
});
|
||||
if !is_same_table {
|
||||
return Some(InstructionReply::GcRegions(GcRegionsReply {
|
||||
result: Err(format!(
|
||||
"Regions to GC should belong to the same table, found: {:?}",
|
||||
region_ids
|
||||
)),
|
||||
}));
|
||||
}
|
||||
|
||||
let (region_id, gc_worker) = match self
|
||||
.create_gc_worker(
|
||||
ctx,
|
||||
region_ids,
|
||||
&gc_regions.file_refs_manifest,
|
||||
gc_regions.full_file_listing,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(worker) => worker,
|
||||
Err(e) => {
|
||||
return Some(InstructionReply::GcRegions(GcRegionsReply {
|
||||
result: Err(format!("Failed to create GC worker: {}", e)),
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
let register_result = ctx
|
||||
.gc_tasks
|
||||
.try_register(
|
||||
region_id,
|
||||
Box::pin(async move {
|
||||
debug!("Starting gc worker for region {}", region_id);
|
||||
let report = gc_worker
|
||||
.run()
|
||||
.await
|
||||
.context(GcMitoEngineSnafu { region_id })?;
|
||||
debug!("Gc worker for region {} finished", region_id);
|
||||
Ok(report)
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
if register_result.is_busy() {
|
||||
warn!("Another gc task is running for the region: {region_id}");
|
||||
return Some(InstructionReply::GcRegions(GcRegionsReply {
|
||||
result: Err(format!(
|
||||
"Another gc task is running for the region: {region_id}"
|
||||
)),
|
||||
}));
|
||||
}
|
||||
let mut watcher = register_result.into_watcher();
|
||||
let result = ctx.gc_tasks.wait_until_finish(&mut watcher).await;
|
||||
match result {
|
||||
Ok(report) => Some(InstructionReply::GcRegions(GcRegionsReply {
|
||||
result: Ok(report),
|
||||
})),
|
||||
Err(err) => Some(InstructionReply::GcRegions(GcRegionsReply {
|
||||
result: Err(format!("{err:?}")),
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GcRegionsHandler {
|
||||
async fn create_gc_worker(
|
||||
&self,
|
||||
ctx: &HandlerContext,
|
||||
mut region_ids: Vec<RegionId>,
|
||||
file_ref_manifest: &FileRefsManifest,
|
||||
full_file_listing: bool,
|
||||
) -> Result<(RegionId, LocalGcWorker)> {
|
||||
// always use the smallest region id on datanode as the target region id
|
||||
region_ids.sort_by_key(|r| r.region_number());
|
||||
let mito_engine = ctx
|
||||
.region_server
|
||||
.mito_engine()
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
violated: "MitoEngine not found".to_string(),
|
||||
})?;
|
||||
let region_id = *region_ids.first().with_context(|| UnexpectedSnafu {
|
||||
violated: "No region ids provided".to_string(),
|
||||
})?;
|
||||
|
||||
let mito_config = mito_engine.mito_config();
|
||||
|
||||
// Find the access layer from one of the regions that exists on this datanode
|
||||
let access_layer = region_ids
|
||||
.iter()
|
||||
.find_map(|rid| mito_engine.find_region(*rid))
|
||||
.with_context(|| InvalidGcArgsSnafu {
|
||||
msg: format!(
|
||||
"None of the regions is on current datanode:{:?}",
|
||||
region_ids
|
||||
),
|
||||
})?
|
||||
.access_layer();
|
||||
|
||||
let cache_manager = mito_engine.cache_manager();
|
||||
|
||||
let gc_worker = LocalGcWorker::try_new(
|
||||
access_layer.clone(),
|
||||
Some(cache_manager),
|
||||
region_ids.into_iter().collect(),
|
||||
Default::default(),
|
||||
mito_config.clone().into(),
|
||||
file_ref_manifest.clone(),
|
||||
&mito_engine.gc_limiter(),
|
||||
full_file_listing,
|
||||
)
|
||||
.await
|
||||
.context(GcMitoEngineSnafu { region_id })?;
|
||||
|
||||
Ok((region_id, gc_worker))
|
||||
}
|
||||
}
|
||||
@@ -12,125 +12,209 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
|
||||
use common_telemetry::{info, warn};
|
||||
use store_api::region_request::{RegionCatchupRequest, RegionRequest, ReplayCheckpoint};
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::instruction::{
|
||||
InstructionReply, UpgradeRegion, UpgradeRegionReply, UpgradeRegionsReply,
|
||||
};
|
||||
use common_telemetry::{debug, info, warn};
|
||||
use store_api::region_request::{RegionCatchupRequest, ReplayCheckpoint};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::heartbeat::handler::{HandlerContext, InstructionHandler};
|
||||
use crate::heartbeat::task_tracker::WaitResult;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct UpgradeRegionsHandler;
|
||||
pub struct UpgradeRegionsHandler {
|
||||
pub upgrade_region_parallelism: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl UpgradeRegionsHandler {
|
||||
fn new_test() -> UpgradeRegionsHandler {
|
||||
UpgradeRegionsHandler {
|
||||
upgrade_region_parallelism: 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UpgradeRegionsHandler {
|
||||
fn convert_responses_to_replies(
|
||||
responses: Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>>,
|
||||
catchup_regions: &[RegionId],
|
||||
) -> Vec<UpgradeRegionReply> {
|
||||
match responses {
|
||||
Ok(responses) => responses
|
||||
.into_iter()
|
||||
.map(|(region_id, result)| match result {
|
||||
Ok(()) => UpgradeRegionReply {
|
||||
region_id,
|
||||
ready: true,
|
||||
exists: true,
|
||||
error: None,
|
||||
},
|
||||
Err(err) => {
|
||||
if err.status_code() == StatusCode::RegionNotFound {
|
||||
UpgradeRegionReply {
|
||||
region_id,
|
||||
ready: false,
|
||||
exists: false,
|
||||
error: Some(format!("{err:?}")),
|
||||
}
|
||||
} else {
|
||||
UpgradeRegionReply {
|
||||
region_id,
|
||||
ready: false,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
Err(err) => catchup_regions
|
||||
.iter()
|
||||
.map(|region_id| UpgradeRegionReply {
|
||||
region_id: *region_id,
|
||||
ready: false,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UpgradeRegionsHandler {
|
||||
// Handles upgrade regions instruction.
|
||||
//
|
||||
// Returns batch of upgrade region replies, the order of the replies is not guaranteed.
|
||||
async fn handle_upgrade_regions(
|
||||
&self,
|
||||
ctx: &HandlerContext,
|
||||
upgrade_regions: Vec<UpgradeRegion>,
|
||||
) -> Vec<UpgradeRegionReply> {
|
||||
let num_upgrade_regions = upgrade_regions.len();
|
||||
let mut replies = Vec::with_capacity(num_upgrade_regions);
|
||||
let mut catchup_requests = Vec::with_capacity(num_upgrade_regions);
|
||||
let mut catchup_regions = Vec::with_capacity(num_upgrade_regions);
|
||||
let mut timeout = None;
|
||||
|
||||
for upgrade_region in upgrade_regions {
|
||||
let Some(writable) = ctx.region_server.is_region_leader(upgrade_region.region_id)
|
||||
else {
|
||||
// Region is not found.
|
||||
debug!("Region {} is not found", upgrade_region.region_id);
|
||||
replies.push(UpgradeRegionReply {
|
||||
region_id: upgrade_region.region_id,
|
||||
ready: false,
|
||||
exists: false,
|
||||
error: None,
|
||||
});
|
||||
continue;
|
||||
};
|
||||
|
||||
// Ignores the catchup requests for writable regions.
|
||||
if writable {
|
||||
warn!(
|
||||
"Region {} is writable, ignores the catchup request",
|
||||
upgrade_region.region_id
|
||||
);
|
||||
replies.push(UpgradeRegionReply {
|
||||
region_id: upgrade_region.region_id,
|
||||
ready: true,
|
||||
exists: true,
|
||||
error: None,
|
||||
});
|
||||
} else {
|
||||
let UpgradeRegion {
|
||||
last_entry_id,
|
||||
metadata_last_entry_id,
|
||||
location_id,
|
||||
replay_entry_id,
|
||||
metadata_replay_entry_id,
|
||||
replay_timeout,
|
||||
..
|
||||
} = upgrade_region;
|
||||
match timeout {
|
||||
Some(timeout) => {
|
||||
debug_assert_eq!(timeout, replay_timeout);
|
||||
}
|
||||
None => {
|
||||
// TODO(weny): required the replay_timeout.
|
||||
timeout = Some(replay_timeout);
|
||||
}
|
||||
}
|
||||
|
||||
let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
|
||||
(Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
|
||||
entry_id,
|
||||
metadata_entry_id,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
catchup_regions.push(upgrade_region.region_id);
|
||||
catchup_requests.push((
|
||||
upgrade_region.region_id,
|
||||
RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: last_entry_id,
|
||||
metadata_entry_id: metadata_last_entry_id,
|
||||
location_id,
|
||||
checkpoint,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let Some(timeout) = timeout else {
|
||||
// No replay timeout, so we don't need to catchup the regions.
|
||||
info!("All regions are writable, no need to catchup");
|
||||
debug_assert_eq!(replies.len(), num_upgrade_regions);
|
||||
return replies;
|
||||
};
|
||||
|
||||
match tokio::time::timeout(
|
||||
timeout,
|
||||
ctx.region_server
|
||||
.handle_batch_catchup_requests(self.upgrade_region_parallelism, catchup_requests),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(responses) => {
|
||||
replies.extend(
|
||||
Self::convert_responses_to_replies(responses, &catchup_regions).into_iter(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
replies.extend(catchup_regions.iter().map(|region_id| UpgradeRegionReply {
|
||||
region_id: *region_id,
|
||||
ready: false,
|
||||
exists: true,
|
||||
error: None,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
replies
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InstructionHandler for UpgradeRegionsHandler {
|
||||
type Instruction = UpgradeRegion;
|
||||
type Instruction = Vec<UpgradeRegion>;
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
ctx: &HandlerContext,
|
||||
UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id,
|
||||
metadata_last_entry_id,
|
||||
replay_timeout,
|
||||
location_id,
|
||||
replay_entry_id,
|
||||
metadata_replay_entry_id,
|
||||
}: UpgradeRegion,
|
||||
upgrade_regions: Self::Instruction,
|
||||
) -> Option<InstructionReply> {
|
||||
let Some(writable) = ctx.region_server.is_region_leader(region_id) else {
|
||||
return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: false,
|
||||
exists: false,
|
||||
error: None,
|
||||
}));
|
||||
};
|
||||
let replies = self.handle_upgrade_regions(ctx, upgrade_regions).await;
|
||||
|
||||
if writable {
|
||||
return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: true,
|
||||
exists: true,
|
||||
error: None,
|
||||
}));
|
||||
}
|
||||
|
||||
let region_server_moved = ctx.region_server.clone();
|
||||
|
||||
let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
|
||||
(Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
|
||||
entry_id,
|
||||
metadata_entry_id,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// The catchup task is almost zero cost if the inside region is writable.
|
||||
// Therefore, it always registers a new catchup task.
|
||||
let register_result = ctx
|
||||
.catchup_tasks
|
||||
.try_register(
|
||||
region_id,
|
||||
Box::pin(async move {
|
||||
info!(
|
||||
"Executing region: {region_id} catchup to: last entry id {last_entry_id:?}"
|
||||
);
|
||||
region_server_moved
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: last_entry_id,
|
||||
metadata_entry_id: metadata_last_entry_id,
|
||||
location_id,
|
||||
checkpoint,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
if register_result.is_busy() {
|
||||
warn!("Another catchup task is running for the region: {region_id}");
|
||||
}
|
||||
|
||||
// Returns immediately
|
||||
let Some(replay_timeout) = replay_timeout else {
|
||||
return Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: false,
|
||||
exists: true,
|
||||
error: None,
|
||||
}));
|
||||
};
|
||||
|
||||
// We don't care that it returns a newly registered or running task.
|
||||
let mut watcher = register_result.into_watcher();
|
||||
let result = ctx.catchup_tasks.wait(&mut watcher, replay_timeout).await;
|
||||
|
||||
match result {
|
||||
WaitResult::Timeout => Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: false,
|
||||
exists: true,
|
||||
error: None,
|
||||
})),
|
||||
WaitResult::Finish(Ok(_)) => {
|
||||
Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: true,
|
||||
exists: true,
|
||||
error: None,
|
||||
}))
|
||||
}
|
||||
WaitResult::Finish(Err(err)) => {
|
||||
Some(InstructionReply::UpgradeRegion(UpgradeRegionReply {
|
||||
ready: false,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
}))
|
||||
}
|
||||
}
|
||||
Some(InstructionReply::UpgradeRegions(UpgradeRegionsReply::new(
|
||||
replies,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,7 +226,6 @@ mod tests {
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::error;
|
||||
use crate::heartbeat::handler::upgrade_region::UpgradeRegionsHandler;
|
||||
@@ -158,21 +241,30 @@ mod tests {
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for replay_timeout in waits {
|
||||
let reply = UpgradeRegionsHandler
|
||||
.handle(
|
||||
&handler_context,
|
||||
let region_id2 = RegionId::new(1024, 2);
|
||||
let replay_timeout = Duration::from_millis(100u64);
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
vec![
|
||||
UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await;
|
||||
UpgradeRegion {
|
||||
region_id: region_id2,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
let replies = &reply.unwrap().expect_upgrade_regions_reply();
|
||||
assert_eq!(replies[0].region_id, region_id);
|
||||
assert_eq!(replies[1].region_id, region_id2);
|
||||
for reply in replies {
|
||||
assert!(!reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
}
|
||||
@@ -182,6 +274,7 @@ mod tests {
|
||||
async fn test_region_writable() {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let region_id2 = RegionId::new(1024, 2);
|
||||
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
@@ -191,25 +284,32 @@ mod tests {
|
||||
unreachable!();
|
||||
}));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
mock_region_server.register_test_region(region_id, mock_engine.clone());
|
||||
mock_region_server.register_test_region(region_id2, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for replay_timeout in waits {
|
||||
let reply = UpgradeRegionsHandler
|
||||
.handle(
|
||||
&handler_context,
|
||||
let replay_timeout = Duration::from_millis(100u64);
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
vec![
|
||||
UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await;
|
||||
UpgradeRegion {
|
||||
region_id: region_id2,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
let replies = &reply.unwrap().expect_upgrade_regions_reply();
|
||||
assert_eq!(replies[0].region_id, region_id);
|
||||
assert_eq!(replies[1].region_id, region_id2);
|
||||
for reply in replies {
|
||||
assert!(reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
@@ -232,30 +332,27 @@ mod tests {
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
let replay_timeout = Duration::from_millis(100u64);
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
vec![UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
}],
|
||||
)
|
||||
.await;
|
||||
|
||||
let waits = vec![None, Some(Duration::from_millis(100u64))];
|
||||
|
||||
for replay_timeout in waits {
|
||||
let reply = UpgradeRegionsHandler
|
||||
.handle(
|
||||
&handler_context,
|
||||
UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
assert!(!reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
}
|
||||
let reply = &reply.unwrap().expect_upgrade_regions_reply()[0];
|
||||
assert!(!reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none(), "error: {:?}", reply.error);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_not_ready_with_retry() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
@@ -264,58 +361,48 @@ mod tests {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(300));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let waits = vec![
|
||||
Some(Duration::from_millis(100u64)),
|
||||
Some(Duration::from_millis(100u64)),
|
||||
];
|
||||
|
||||
let waits = vec![Duration::from_millis(100u64), Duration::from_millis(100u64)];
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
for replay_timeout in waits {
|
||||
let reply = UpgradeRegionsHandler
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
UpgradeRegion {
|
||||
vec![UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout,
|
||||
..Default::default()
|
||||
},
|
||||
}],
|
||||
)
|
||||
.await;
|
||||
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
let reply = &reply.unwrap().expect_upgrade_regions_reply()[0];
|
||||
assert!(!reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
assert!(reply.error.is_none(), "error: {:?}", reply.error);
|
||||
}
|
||||
|
||||
let timer = Instant::now();
|
||||
let reply = UpgradeRegionsHandler
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
UpgradeRegion {
|
||||
vec![UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout: Some(Duration::from_millis(500)),
|
||||
replay_timeout: Duration::from_millis(500),
|
||||
..Default::default()
|
||||
},
|
||||
}],
|
||||
)
|
||||
.await;
|
||||
// Must less than 300 ms.
|
||||
assert!(timer.elapsed().as_millis() < 300);
|
||||
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
let reply = &reply.unwrap().expect_upgrade_regions_reply()[0];
|
||||
assert!(reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
assert!(reply.error.is_none(), "error: {:?}", reply.error);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_error() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
@@ -335,38 +422,37 @@ mod tests {
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let reply = UpgradeRegionsHandler
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
UpgradeRegion {
|
||||
vec![UpgradeRegion {
|
||||
region_id,
|
||||
..Default::default()
|
||||
},
|
||||
}],
|
||||
)
|
||||
.await;
|
||||
|
||||
// It didn't wait for handle returns; it had no idea about the error.
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
let reply = &reply.unwrap().expect_upgrade_regions_reply()[0];
|
||||
assert!(!reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_none());
|
||||
|
||||
let reply = UpgradeRegionsHandler
|
||||
let reply = UpgradeRegionsHandler::new_test()
|
||||
.handle(
|
||||
&handler_context,
|
||||
UpgradeRegion {
|
||||
vec![UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout: Some(Duration::from_millis(200)),
|
||||
replay_timeout: Duration::from_millis(200),
|
||||
..Default::default()
|
||||
},
|
||||
}],
|
||||
)
|
||||
.await;
|
||||
|
||||
let reply = reply.unwrap().expect_upgrade_region_reply();
|
||||
let reply = &reply.unwrap().expect_upgrade_regions_reply()[0];
|
||||
assert!(!reply.ready);
|
||||
assert!(reply.exists);
|
||||
assert!(reply.error.is_some());
|
||||
assert!(reply.error.unwrap().contains("mock_error"));
|
||||
assert!(reply.error.as_ref().unwrap().contains("mock_error"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,4 +75,20 @@ lazy_static! {
|
||||
&[RESULT_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
/// Total count of failed region server requests.
|
||||
pub static ref REGION_SERVER_REQUEST_FAILURE_COUNT: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_datanode_region_request_fail_count",
|
||||
"failed region server requests count",
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
/// Total count of failed insert requests to region server.
|
||||
pub static ref REGION_SERVER_INSERT_FAIL_COUNT: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_datanode_region_failed_insert_count",
|
||||
"failed region server insert requests count",
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
@@ -66,7 +66,8 @@ use store_api::region_engine::{
|
||||
SettableRegionRoleState,
|
||||
};
|
||||
use store_api::region_request::{
|
||||
AffectedRows, BatchRegionDdlRequest, RegionCloseRequest, RegionOpenRequest, RegionRequest,
|
||||
AffectedRows, BatchRegionDdlRequest, RegionCatchupRequest, RegionCloseRequest,
|
||||
RegionOpenRequest, RegionRequest,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::{Semaphore, SemaphorePermit};
|
||||
@@ -158,6 +159,27 @@ impl RegionServer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the MitoEngine if it's registered.
|
||||
pub fn mito_engine(&self) -> Option<MitoEngine> {
|
||||
if let Some(mito) = self.inner.mito_engine.read().unwrap().clone() {
|
||||
Some(mito)
|
||||
} else {
|
||||
self.inner
|
||||
.engines
|
||||
.read()
|
||||
.unwrap()
|
||||
.get(MITO_ENGINE_NAME)
|
||||
.cloned()
|
||||
.and_then(|e| {
|
||||
let mito = e.as_any().downcast_ref::<MitoEngine>().cloned();
|
||||
if mito.is_none() {
|
||||
warn!("Mito engine not found in region server engines");
|
||||
}
|
||||
mito
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn handle_batch_open_requests(
|
||||
&self,
|
||||
@@ -170,6 +192,17 @@ impl RegionServer {
|
||||
.await
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn handle_batch_catchup_requests(
|
||||
&self,
|
||||
parallelism: usize,
|
||||
requests: Vec<(RegionId, RegionCatchupRequest)>,
|
||||
) -> Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>> {
|
||||
self.inner
|
||||
.handle_batch_catchup_requests(parallelism, requests)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(request_type = request.request_type()))]
|
||||
pub async fn handle_request(
|
||||
&self,
|
||||
@@ -378,6 +411,14 @@ impl RegionServer {
|
||||
#[cfg(test)]
|
||||
/// Registers a region for test purpose.
|
||||
pub(crate) fn register_test_region(&self, region_id: RegionId, engine: RegionEngineRef) {
|
||||
{
|
||||
let mut engines = self.inner.engines.write().unwrap();
|
||||
if !engines.contains_key(engine.name()) {
|
||||
debug!("Registering test engine: {}", engine.name());
|
||||
engines.insert(engine.name().to_string(), engine.clone());
|
||||
}
|
||||
}
|
||||
|
||||
self.inner
|
||||
.region_map
|
||||
.insert(region_id, RegionEngineWithStatus::Ready(engine));
|
||||
@@ -559,6 +600,8 @@ impl RegionServer {
|
||||
#[async_trait]
|
||||
impl RegionServerHandler for RegionServer {
|
||||
async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
|
||||
let failed_requests_cnt = crate::metrics::REGION_SERVER_REQUEST_FAILURE_COUNT
|
||||
.with_label_values(&[request.as_ref()]);
|
||||
let response = match &request {
|
||||
region_request::Body::Creates(_)
|
||||
| region_request::Body::Drops(_)
|
||||
@@ -576,6 +619,9 @@ impl RegionServerHandler for RegionServer {
|
||||
_ => self.handle_requests_in_serial(request).await,
|
||||
}
|
||||
.map_err(BoxedError::new)
|
||||
.inspect_err(|_| {
|
||||
failed_requests_cnt.inc();
|
||||
})
|
||||
.context(ExecuteGrpcRequestSnafu)?;
|
||||
|
||||
Ok(RegionResponseV1 {
|
||||
@@ -676,14 +722,14 @@ struct RegionServerInner {
|
||||
runtime: Runtime,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
table_provider_factory: TableProviderFactoryRef,
|
||||
// The number of queries allowed to be executed at the same time.
|
||||
// Act as last line of defense on datanode to prevent query overloading.
|
||||
/// The number of queries allowed to be executed at the same time.
|
||||
/// Act as last line of defense on datanode to prevent query overloading.
|
||||
parallelism: Option<RegionServerParallelism>,
|
||||
// The topic stats reporter.
|
||||
/// The topic stats reporter.
|
||||
topic_stats_reporter: RwLock<Option<Box<dyn TopicStatsReporter>>>,
|
||||
// HACK(zhongzc): Direct MitoEngine handle for diagnostics. This couples the
|
||||
// server with a concrete engine; acceptable for now to fetch Mito-specific
|
||||
// info (e.g., list SSTs). Consider a diagnostics trait later.
|
||||
/// HACK(zhongzc): Direct MitoEngine handle for diagnostics. This couples the
|
||||
/// server with a concrete engine; acceptable for now to fetch Mito-specific
|
||||
/// info (e.g., list SSTs). Consider a diagnostics trait later.
|
||||
mito_engine: RwLock<Option<MitoEngine>>,
|
||||
}
|
||||
|
||||
@@ -951,6 +997,116 @@ impl RegionServerInner {
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
pub async fn handle_batch_catchup_requests_inner(
|
||||
&self,
|
||||
engine: RegionEngineRef,
|
||||
parallelism: usize,
|
||||
requests: Vec<(RegionId, RegionCatchupRequest)>,
|
||||
) -> Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>> {
|
||||
for (region_id, _) in &requests {
|
||||
self.set_region_status_not_ready(*region_id, &engine, &RegionChange::Catchup);
|
||||
}
|
||||
let region_ids = requests
|
||||
.iter()
|
||||
.map(|(region_id, _)| *region_id)
|
||||
.collect::<Vec<_>>();
|
||||
let mut responses = Vec::with_capacity(requests.len());
|
||||
match engine
|
||||
.handle_batch_catchup_requests(parallelism, requests)
|
||||
.await
|
||||
{
|
||||
Ok(results) => {
|
||||
for (region_id, result) in results {
|
||||
match result {
|
||||
Ok(_) => {
|
||||
if let Err(e) = self
|
||||
.set_region_status_ready(
|
||||
region_id,
|
||||
engine.clone(),
|
||||
RegionChange::Catchup,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!(e; "Failed to set region to ready: {}", region_id);
|
||||
responses.push((region_id, Err(BoxedError::new(e))));
|
||||
} else {
|
||||
responses.push((region_id, Ok(())));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
self.unset_region_status(region_id, &engine, RegionChange::Catchup);
|
||||
error!(e; "Failed to catchup region: {}", region_id);
|
||||
responses.push((region_id, Err(e)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
for region_id in region_ids {
|
||||
self.unset_region_status(region_id, &engine, RegionChange::Catchup);
|
||||
}
|
||||
error!(e; "Failed to catchup batch regions");
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!("Failed to catchup batch regions: {:?}", e),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(responses)
|
||||
}
|
||||
|
||||
pub async fn handle_batch_catchup_requests(
|
||||
&self,
|
||||
parallelism: usize,
|
||||
requests: Vec<(RegionId, RegionCatchupRequest)>,
|
||||
) -> Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>> {
|
||||
let mut engine_grouped_requests: HashMap<String, Vec<_>> = HashMap::new();
|
||||
|
||||
let mut responses = Vec::with_capacity(requests.len());
|
||||
for (region_id, request) in requests {
|
||||
if let Ok(engine) = self.get_engine(region_id, &RegionChange::Catchup) {
|
||||
match engine {
|
||||
CurrentEngine::Engine(engine) => {
|
||||
engine_grouped_requests
|
||||
.entry(engine.name().to_string())
|
||||
.or_default()
|
||||
.push((region_id, request));
|
||||
}
|
||||
CurrentEngine::EarlyReturn(_) => {
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!("Unexpected engine type for region {}", region_id),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
responses.push((
|
||||
region_id,
|
||||
Err(BoxedError::new(
|
||||
error::RegionNotFoundSnafu { region_id }.build(),
|
||||
)),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
for (engine, requests) in engine_grouped_requests {
|
||||
let engine = self
|
||||
.engines
|
||||
.read()
|
||||
.unwrap()
|
||||
.get(&engine)
|
||||
.with_context(|| RegionEngineNotFoundSnafu { name: &engine })?
|
||||
.clone();
|
||||
responses.extend(
|
||||
self.handle_batch_catchup_requests_inner(engine, parallelism, requests)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(responses)
|
||||
}
|
||||
|
||||
// Handle requests in batch.
|
||||
//
|
||||
// limitation: all create requests must be in the same engine.
|
||||
@@ -1079,6 +1235,11 @@ impl RegionServerInner {
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
if matches!(region_change, RegionChange::Ingest) {
|
||||
crate::metrics::REGION_SERVER_INSERT_FAIL_COUNT
|
||||
.with_label_values(&[request_type])
|
||||
.inc();
|
||||
}
|
||||
// Removes the region status if the operation fails.
|
||||
self.unset_region_status(region_id, &engine, region_change);
|
||||
Err(err)
|
||||
|
||||
@@ -47,10 +47,7 @@ pub(crate) async fn new_object_store_without_cache(
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
pub(crate) async fn new_object_store(
|
||||
store: ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
pub async fn new_object_store(store: ObjectStoreConfig, data_home: &str) -> Result<ObjectStore> {
|
||||
let object_store = new_raw_object_store(&store, data_home)
|
||||
.await
|
||||
.context(error::ObjectStoreSnafu)?;
|
||||
@@ -59,7 +56,7 @@ pub(crate) async fn new_object_store(
|
||||
let object_store = {
|
||||
// It's safe to unwrap here because we already checked above.
|
||||
let cache_config = store.cache_config().unwrap();
|
||||
if let Some(cache_layer) = build_cache_layer(cache_config).await? {
|
||||
if let Some(cache_layer) = build_cache_layer(cache_config, data_home).await? {
|
||||
// Adds cache layer
|
||||
object_store.layer(cache_layer)
|
||||
} else {
|
||||
@@ -79,17 +76,22 @@ pub(crate) async fn new_object_store(
|
||||
|
||||
async fn build_cache_layer(
|
||||
cache_config: &ObjectStorageCacheConfig,
|
||||
data_home: &str,
|
||||
) -> Result<Option<LruCacheLayer<impl Access>>> {
|
||||
// No need to build cache layer if read cache is disabled.
|
||||
if !cache_config.enable_read_cache {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let atomic_temp_dir = join_dir(&cache_config.cache_path, ATOMIC_WRITE_DIR);
|
||||
let cache_base_dir = if cache_config.cache_path.is_empty() {
|
||||
data_home
|
||||
} else {
|
||||
&cache_config.cache_path
|
||||
};
|
||||
let atomic_temp_dir = join_dir(cache_base_dir, ATOMIC_WRITE_DIR);
|
||||
clean_temp_dir(&atomic_temp_dir).context(error::ObjectStoreSnafu)?;
|
||||
|
||||
let cache_store = Fs::default()
|
||||
.root(&cache_config.cache_path)
|
||||
.root(cache_base_dir)
|
||||
.atomic_write_dir(&atomic_temp_dir)
|
||||
.build()
|
||||
.context(error::BuildCacheStoreSnafu)?;
|
||||
|
||||
@@ -277,6 +277,10 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Null(NullType))
|
||||
}
|
||||
|
||||
pub(crate) fn is_struct(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Struct(_))
|
||||
}
|
||||
|
||||
/// Try to cast the type as a [`ListType`].
|
||||
pub fn as_list(&self) -> Option<&ListType> {
|
||||
match self {
|
||||
|
||||
@@ -266,6 +266,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse or serialize arrow metadata"))]
|
||||
ArrowMetadata {
|
||||
#[snafu(source)]
|
||||
error: arrow::error::ArrowError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -307,7 +315,8 @@ impl ErrorExt for Error {
|
||||
| ConvertArrowArrayToScalars { .. }
|
||||
| ConvertScalarToArrowArray { .. }
|
||||
| ParseExtendedType { .. }
|
||||
| InconsistentStructFieldsAndItems { .. } => StatusCode::Internal,
|
||||
| InconsistentStructFieldsAndItems { .. }
|
||||
| ArrowMetadata { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
15
src/datatypes/src/extension.rs
Normal file
15
src/datatypes/src/extension.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod json;
|
||||
104
src/datatypes/src/extension/json.rs
Normal file
104
src/datatypes/src/extension/json.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::extension::ExtensionType;
|
||||
use arrow_schema::{ArrowError, DataType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::json::JsonStructureSettings;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct JsonMetadata {
|
||||
/// Indicates how to handle JSON is stored in underlying data type
|
||||
///
|
||||
/// This field can be `None` for data is converted to complete structured in-memory form.
|
||||
pub json_structure_settings: Option<JsonStructureSettings>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct JsonExtensionType(Arc<JsonMetadata>);
|
||||
|
||||
impl JsonExtensionType {
|
||||
pub fn new(metadata: Arc<JsonMetadata>) -> Self {
|
||||
JsonExtensionType(metadata)
|
||||
}
|
||||
}
|
||||
|
||||
impl ExtensionType for JsonExtensionType {
|
||||
const NAME: &'static str = "greptime.json";
|
||||
type Metadata = Arc<JsonMetadata>;
|
||||
|
||||
fn metadata(&self) -> &Self::Metadata {
|
||||
&self.0
|
||||
}
|
||||
|
||||
fn serialize_metadata(&self) -> Option<String> {
|
||||
serde_json::to_string(self.metadata()).ok()
|
||||
}
|
||||
|
||||
fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
|
||||
if let Some(metadata) = metadata {
|
||||
let metadata = serde_json::from_str(metadata).map_err(|e| {
|
||||
ArrowError::ParseError(format!("Failed to deserialize JSON metadata: {}", e))
|
||||
})?;
|
||||
Ok(Arc::new(metadata))
|
||||
} else {
|
||||
Ok(Arc::new(JsonMetadata::default()))
|
||||
}
|
||||
}
|
||||
|
||||
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
|
||||
match data_type {
|
||||
// object
|
||||
DataType::Struct(_)
|
||||
// array
|
||||
| DataType::List(_)
|
||||
| DataType::ListView(_)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::LargeListView(_)
|
||||
// string
|
||||
| DataType::Utf8
|
||||
| DataType::Utf8View
|
||||
| DataType::LargeUtf8
|
||||
// number
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Float32
|
||||
| DataType::Float64
|
||||
// boolean
|
||||
| DataType::Boolean
|
||||
// null
|
||||
| DataType::Null
|
||||
// legacy json type
|
||||
| DataType::Binary => Ok(()),
|
||||
dt => Err(ArrowError::SchemaError(format!(
|
||||
"Unexpected data type {dt}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
|
||||
let json = Self(metadata);
|
||||
json.supports_data_type(data_type)?;
|
||||
Ok(json)
|
||||
}
|
||||
}
|
||||
@@ -13,11 +13,13 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(assert_matches)]
|
||||
#![feature(box_patterns)]
|
||||
|
||||
pub mod arrow_array;
|
||||
pub mod data_type;
|
||||
pub mod duration;
|
||||
pub mod error;
|
||||
pub mod extension;
|
||||
pub mod interval;
|
||||
pub mod json;
|
||||
pub mod macros;
|
||||
|
||||
@@ -32,9 +32,8 @@ pub use crate::schema::column_schema::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, ColumnExtType, ColumnSchema, FULLTEXT_KEY,
|
||||
FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY,
|
||||
JSON_STRUCTURE_SETTINGS_KEY, Metadata, SKIPPING_INDEX_KEY, SkippingIndexOptions,
|
||||
SkippingIndexType, TIME_INDEX_KEY,
|
||||
FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata,
|
||||
SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, TIME_INDEX_KEY,
|
||||
};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
@@ -17,13 +17,17 @@ use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use arrow::datatypes::Field;
|
||||
use arrow_schema::extension::{
|
||||
EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ResultExt, ensure};
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result};
|
||||
use crate::json::JsonStructureSettings;
|
||||
use crate::error::{
|
||||
self, ArrowMetadataSnafu, Error, InvalidFulltextOptionSnafu, ParseExtendedTypeSnafu, Result,
|
||||
};
|
||||
use crate::schema::TYPE_KEY;
|
||||
use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
use crate::value::Value;
|
||||
@@ -42,7 +46,6 @@ pub const FULLTEXT_KEY: &str = "greptime:fulltext";
|
||||
pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
|
||||
/// Key used to store skip options in arrow field's metadata.
|
||||
pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
|
||||
pub const JSON_STRUCTURE_SETTINGS_KEY: &str = "greptime:json:structure_settings";
|
||||
|
||||
/// Keys used in fulltext options
|
||||
pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
|
||||
@@ -394,18 +397,38 @@ impl ColumnSchema {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn json_structure_settings(&self) -> Result<Option<JsonStructureSettings>> {
|
||||
self.metadata
|
||||
.get(JSON_STRUCTURE_SETTINGS_KEY)
|
||||
.map(|json| serde_json::from_str(json).context(error::DeserializeSnafu { json }))
|
||||
.transpose()
|
||||
pub fn extension_type<E>(&self) -> Result<Option<E>>
|
||||
where
|
||||
E: ExtensionType,
|
||||
{
|
||||
let extension_type_name = self.metadata.get(EXTENSION_TYPE_NAME_KEY);
|
||||
|
||||
if extension_type_name.map(|s| s.as_str()) == Some(E::NAME) {
|
||||
let extension_metadata = self.metadata.get(EXTENSION_TYPE_METADATA_KEY);
|
||||
let extension_metadata =
|
||||
E::deserialize_metadata(extension_metadata.map(|s| s.as_str()))
|
||||
.context(ArrowMetadataSnafu)?;
|
||||
|
||||
let extension = E::try_new(&self.data_type.as_arrow_type(), extension_metadata)
|
||||
.context(ArrowMetadataSnafu)?;
|
||||
Ok(Some(extension))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_json_structure_settings(&mut self, settings: &JsonStructureSettings) -> Result<()> {
|
||||
self.metadata.insert(
|
||||
JSON_STRUCTURE_SETTINGS_KEY.to_string(),
|
||||
serde_json::to_string(settings).context(error::SerializeSnafu)?,
|
||||
);
|
||||
pub fn with_extension_type<E>(&mut self, extension_type: &E) -> Result<()>
|
||||
where
|
||||
E: ExtensionType,
|
||||
{
|
||||
self.metadata
|
||||
.insert(EXTENSION_TYPE_NAME_KEY.to_string(), E::NAME.to_string());
|
||||
|
||||
if let Some(extension_metadata) = extension_type.serialize_metadata() {
|
||||
self.metadata
|
||||
.insert(EXTENSION_TYPE_METADATA_KEY.to_string(), extension_metadata);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -31,9 +31,12 @@ use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{ListType, StructField, StructType};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::json::builder::JsonVectorBuilder;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
const JSON_PLAIN_FIELD_NAME: &str = "__plain__";
|
||||
const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
|
||||
pub enum JsonFormat {
|
||||
@@ -54,28 +57,46 @@ impl JsonType {
|
||||
Self { format }
|
||||
}
|
||||
|
||||
// TODO(LFC): remove "allow unused"
|
||||
#[allow(unused)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
format: JsonFormat::Native(Box::new(ConcreteDataType::null_datatype())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Make json type a struct type, by:
|
||||
/// - if the json is an object, its entries are mapped to struct fields, obviously;
|
||||
/// - if not, the json is one of bool, number, string or array, make it a special field called
|
||||
/// "__plain" in a struct with only that field.
|
||||
/// [JSON_PLAIN_FIELD_NAME] with metadata [JSON_PLAIN_FIELD_METADATA_KEY] = `"true"` in a
|
||||
/// struct with only that field.
|
||||
pub(crate) fn as_struct_type(&self) -> StructType {
|
||||
match &self.format {
|
||||
JsonFormat::Jsonb => StructType::default(),
|
||||
JsonFormat::Native(inner) => match inner.as_ref() {
|
||||
ConcreteDataType::Struct(t) => t.clone(),
|
||||
x => StructType::new(Arc::new(vec![StructField::new(
|
||||
"__plain".to_string(),
|
||||
x.clone(),
|
||||
true,
|
||||
)])),
|
||||
x => {
|
||||
let mut field =
|
||||
StructField::new(JSON_PLAIN_FIELD_NAME.to_string(), x.clone(), true);
|
||||
field.insert_metadata(JSON_PLAIN_FIELD_METADATA_KEY, true);
|
||||
StructType::new(Arc::new(vec![field]))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(LFC): remove "allow unused"
|
||||
#[allow(unused)]
|
||||
/// Check if this json type is the special "plain" one.
|
||||
/// See [JsonType::as_struct_type].
|
||||
pub(crate) fn is_plain_json(&self) -> bool {
|
||||
let JsonFormat::Native(box ConcreteDataType::Struct(t)) = &self.format else {
|
||||
return true;
|
||||
};
|
||||
let fields = t.fields();
|
||||
let Some((single, [])) = fields.split_first() else {
|
||||
return false;
|
||||
};
|
||||
single.name() == JSON_PLAIN_FIELD_NAME
|
||||
&& single.metadata(JSON_PLAIN_FIELD_METADATA_KEY) == Some("true")
|
||||
}
|
||||
|
||||
/// Try to merge this json type with others, error on datatype conflict.
|
||||
pub(crate) fn merge(&mut self, other: &JsonType) -> Result<()> {
|
||||
match (&self.format, &other.format) {
|
||||
@@ -91,6 +112,47 @@ impl JsonType {
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_mergeable(&self, other: &JsonType) -> bool {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
is_mergeable(this.as_ref(), that.as_ref())
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_mergeable(this: &ConcreteDataType, that: &ConcreteDataType) -> bool {
|
||||
fn is_mergeable_struct(this: &StructType, that: &StructType) -> bool {
|
||||
let this_fields = this.fields();
|
||||
let this_fields = this_fields
|
||||
.iter()
|
||||
.map(|x| (x.name(), x))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for that_field in that.fields().iter() {
|
||||
if let Some(this_field) = this_fields.get(that_field.name())
|
||||
&& !is_mergeable(this_field.data_type(), that_field.data_type())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
match (this, that) {
|
||||
(this, that) if this == that => true,
|
||||
(ConcreteDataType::List(this), ConcreteDataType::List(that)) => {
|
||||
is_mergeable(this.item_type(), that.item_type())
|
||||
}
|
||||
(ConcreteDataType::Struct(this), ConcreteDataType::Struct(that)) => {
|
||||
is_mergeable_struct(this, that)
|
||||
}
|
||||
(ConcreteDataType::Null(_), _) | (_, ConcreteDataType::Null(_)) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn merge(this: &ConcreteDataType, that: &ConcreteDataType) -> Result<ConcreteDataType> {
|
||||
@@ -166,7 +228,10 @@ impl DataType for JsonType {
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
match self.format {
|
||||
JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
|
||||
JsonFormat::Native(_) => Box::new(JsonVectorBuilder::with_capacity(capacity)),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
@@ -226,10 +291,12 @@ mod tests {
|
||||
let result = json_type.merge(other);
|
||||
match (result, expected) {
|
||||
(Ok(()), Ok(expected)) => {
|
||||
assert_eq!(json_type.name(), expected)
|
||||
assert_eq!(json_type.name(), expected);
|
||||
assert!(json_type.is_mergeable(other));
|
||||
}
|
||||
(Err(err), Err(expected)) => {
|
||||
assert_eq!(err.to_string(), expected)
|
||||
assert_eq!(err.to_string(), expected);
|
||||
assert!(!json_type.is_mergeable(other));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||
@@ -46,6 +47,15 @@ impl TryFrom<&Fields> for StructType {
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> From<[StructField; N]> for StructType {
|
||||
fn from(value: [StructField; N]) -> Self {
|
||||
let value: Box<[StructField]> = Box::new(value);
|
||||
Self {
|
||||
fields: Arc::new(value.into_vec()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for StructType {
|
||||
fn name(&self) -> String {
|
||||
format!(
|
||||
@@ -108,6 +118,7 @@ pub struct StructField {
|
||||
name: String,
|
||||
data_type: ConcreteDataType,
|
||||
nullable: bool,
|
||||
metadata: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
impl StructField {
|
||||
@@ -116,6 +127,7 @@ impl StructField {
|
||||
name,
|
||||
data_type,
|
||||
nullable,
|
||||
metadata: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,11 +147,25 @@ impl StructField {
|
||||
self.nullable
|
||||
}
|
||||
|
||||
pub(crate) fn insert_metadata(&mut self, key: impl ToString, value: impl ToString) {
|
||||
self.metadata.insert(key.to_string(), value.to_string());
|
||||
}
|
||||
|
||||
pub(crate) fn metadata(&self, key: &str) -> Option<&str> {
|
||||
self.metadata.get(key).map(String::as_str)
|
||||
}
|
||||
|
||||
pub fn to_df_field(&self) -> Field {
|
||||
let metadata = self
|
||||
.metadata
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
Field::new(
|
||||
self.name.clone(),
|
||||
self.data_type.as_arrow_type(),
|
||||
self.nullable,
|
||||
)
|
||||
.with_metadata(metadata)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -873,6 +873,12 @@ impl From<&[u8]> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<()> for Value {
|
||||
fn from(_: ()) -> Self {
|
||||
Value::Null
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Value> for serde_json::Value {
|
||||
type Error = serde_json::Error;
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ mod duration;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod interval;
|
||||
pub(crate) mod json;
|
||||
mod list;
|
||||
mod null;
|
||||
pub(crate) mod operations;
|
||||
|
||||
@@ -464,6 +464,14 @@ impl Helper {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn pretty_print(vector: VectorRef) -> String {
|
||||
let array = vector.to_arrow_array();
|
||||
arrow::util::pretty::pretty_format_columns(&vector.vector_type_name(), &[array])
|
||||
.map(|x| x.to_string())
|
||||
.unwrap_or_else(|e| e.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{
|
||||
|
||||
15
src/datatypes/src/vectors/json.rs
Normal file
15
src/datatypes/src/vectors/json.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod builder;
|
||||
485
src/datatypes/src/vectors/json/builder.rs
Normal file
485
src/datatypes/src/vectors/json/builder.rs
Normal file
@@ -0,0 +1,485 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
|
||||
use crate::prelude::{ValueRef, Vector, VectorRef};
|
||||
use crate::types::JsonType;
|
||||
use crate::value::StructValueRef;
|
||||
use crate::vectors::{MutableVector, StructVectorBuilder};
|
||||
|
||||
struct JsonStructsBuilder {
|
||||
json_type: JsonType,
|
||||
inner: StructVectorBuilder,
|
||||
}
|
||||
|
||||
impl JsonStructsBuilder {
|
||||
fn new(json_type: JsonType, capacity: usize) -> Self {
|
||||
let struct_type = json_type.as_struct_type();
|
||||
let inner = StructVectorBuilder::with_type_and_capacity(struct_type, capacity);
|
||||
Self { json_type, inner }
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
fn push(&mut self, value: &ValueRef) -> Result<()> {
|
||||
if self.json_type.is_plain_json() {
|
||||
let value = ValueRef::Struct(StructValueRef::RefList {
|
||||
val: vec![value.clone()],
|
||||
fields: self.json_type.as_struct_type(),
|
||||
});
|
||||
self.inner.try_push_value_ref(&value)
|
||||
} else {
|
||||
self.inner.try_push_value_ref(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to merge (and consume the data of) other json vector builder into this one.
|
||||
/// Note that the other builder's json type must be able to be merged with this one's
|
||||
/// (this one's json type has all the fields in other one's, and no datatypes conflict).
|
||||
/// Normally this is guaranteed, as long as json values are pushed through [JsonVectorBuilder].
|
||||
fn try_merge(&mut self, other: &mut JsonStructsBuilder) -> Result<()> {
|
||||
debug_assert!(self.json_type.is_mergeable(&other.json_type));
|
||||
|
||||
fn helper(this: &mut StructVectorBuilder, that: &mut StructVectorBuilder) -> Result<()> {
|
||||
let that_len = that.len();
|
||||
if let Some(x) = that.mut_null_buffer().finish() {
|
||||
this.mut_null_buffer().append_buffer(&x)
|
||||
} else {
|
||||
this.mut_null_buffer().append_n_non_nulls(that_len);
|
||||
}
|
||||
|
||||
let that_fields = that.struct_type().fields();
|
||||
let mut that_builders = that_fields
|
||||
.iter()
|
||||
.zip(that.mut_value_builders().iter_mut())
|
||||
.map(|(field, builder)| (field.name(), builder))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (field, this_builder) in this
|
||||
.struct_type()
|
||||
.fields()
|
||||
.iter()
|
||||
.zip(this.mut_value_builders().iter_mut())
|
||||
{
|
||||
if let Some(that_builder) = that_builders.get_mut(field.name()) {
|
||||
if field.data_type().is_struct() {
|
||||
let this = this_builder
|
||||
.as_mut_any()
|
||||
.downcast_mut::<StructVectorBuilder>()
|
||||
// Safety: a struct datatype field must be corresponding to a struct vector builder.
|
||||
.unwrap();
|
||||
|
||||
let that = that_builder
|
||||
.as_mut_any()
|
||||
.downcast_mut::<StructVectorBuilder>()
|
||||
// Safety: other builder with same field name must have same datatype,
|
||||
// ensured because the two json types are mergeable.
|
||||
.unwrap();
|
||||
helper(this, that)?;
|
||||
} else {
|
||||
let vector = that_builder.to_vector();
|
||||
this_builder.extend_slice_of(vector.as_ref(), 0, vector.len())?;
|
||||
}
|
||||
} else {
|
||||
this_builder.push_nulls(that_len);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
helper(&mut self.inner, &mut other.inner)
|
||||
}
|
||||
|
||||
/// Same as [JsonStructsBuilder::try_merge], but does not consume the other builder's data.
|
||||
fn try_merge_cloned(&mut self, other: &JsonStructsBuilder) -> Result<()> {
|
||||
debug_assert!(self.json_type.is_mergeable(&other.json_type));
|
||||
|
||||
fn helper(this: &mut StructVectorBuilder, that: &StructVectorBuilder) -> Result<()> {
|
||||
let that_len = that.len();
|
||||
if let Some(x) = that.null_buffer().finish_cloned() {
|
||||
this.mut_null_buffer().append_buffer(&x)
|
||||
} else {
|
||||
this.mut_null_buffer().append_n_non_nulls(that_len);
|
||||
}
|
||||
|
||||
let that_fields = that.struct_type().fields();
|
||||
let that_builders = that_fields
|
||||
.iter()
|
||||
.zip(that.value_builders().iter())
|
||||
.map(|(field, builder)| (field.name(), builder))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (field, this_builder) in this
|
||||
.struct_type()
|
||||
.fields()
|
||||
.iter()
|
||||
.zip(this.mut_value_builders().iter_mut())
|
||||
{
|
||||
if let Some(that_builder) = that_builders.get(field.name()) {
|
||||
if field.data_type().is_struct() {
|
||||
let this = this_builder
|
||||
.as_mut_any()
|
||||
.downcast_mut::<StructVectorBuilder>()
|
||||
// Safety: a struct datatype field must be corresponding to a struct vector builder.
|
||||
.unwrap();
|
||||
|
||||
let that = that_builder
|
||||
.as_any()
|
||||
.downcast_ref::<StructVectorBuilder>()
|
||||
// Safety: other builder with same field name must have same datatype,
|
||||
// ensured because the two json types are mergeable.
|
||||
.unwrap();
|
||||
helper(this, that)?;
|
||||
} else {
|
||||
let vector = that_builder.to_vector_cloned();
|
||||
this_builder.extend_slice_of(vector.as_ref(), 0, vector.len())?;
|
||||
}
|
||||
} else {
|
||||
this_builder.push_nulls(that_len);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
helper(&mut self.inner, &other.inner)
|
||||
}
|
||||
}
|
||||
|
||||
/// The vector builder for json type values.
|
||||
///
|
||||
/// Json type are dynamic, to some degree (as long as they can be merged into each other). So are
|
||||
/// json values. Json values are physically stored in struct vectors, which require the types of
|
||||
/// struct values to be fixed inside a certain struct vector. So to resolve "dynamic" vs "fixed"
|
||||
/// datatype problem, in this builder, each type of json value gets its own struct vector builder.
|
||||
/// Once new json type value is pushing into this builder, it creates a new "child" builder for it.
|
||||
///
|
||||
/// Given the "mixed" nature of the values stored in this builder, to produce the json vector, a
|
||||
/// "merge" operation is performed. The "merge" is to iterate over all the "child" builders, and fill
|
||||
/// nulls for missing json fields. The final vector's json type is fixed to be the "merge" of all
|
||||
/// pushed json types.
|
||||
pub(crate) struct JsonVectorBuilder {
|
||||
merged_type: JsonType,
|
||||
capacity: usize,
|
||||
builders: Vec<JsonStructsBuilder>,
|
||||
}
|
||||
|
||||
impl JsonVectorBuilder {
|
||||
pub(crate) fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
merged_type: JsonType::empty(),
|
||||
capacity,
|
||||
builders: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn try_create_new_builder(&mut self, json_type: &JsonType) -> Result<&mut JsonStructsBuilder> {
|
||||
self.merged_type.merge(json_type)?;
|
||||
|
||||
let builder = JsonStructsBuilder::new(json_type.clone(), self.capacity);
|
||||
self.builders.push(builder);
|
||||
|
||||
let len = self.builders.len();
|
||||
Ok(&mut self.builders[len - 1])
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for JsonVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::Json(self.merged_type.clone())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.builders.iter().map(|x| x.len()).sum()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
// Fast path:
|
||||
if self.builders.len() == 1 {
|
||||
return self.builders[0].inner.to_vector();
|
||||
}
|
||||
|
||||
let mut unified_jsons = JsonStructsBuilder::new(self.merged_type.clone(), self.capacity);
|
||||
for builder in self.builders.iter_mut() {
|
||||
unified_jsons
|
||||
.try_merge(builder)
|
||||
// Safety: the "unified_jsons" has the merged json type from all the builders,
|
||||
// so it should merge them without errors.
|
||||
.unwrap_or_else(|e| panic!("failed to merge json builders, error: {e}"));
|
||||
}
|
||||
unified_jsons.inner.to_vector()
|
||||
}
|
||||
|
||||
fn to_vector_cloned(&self) -> VectorRef {
|
||||
// Fast path:
|
||||
if self.builders.len() == 1 {
|
||||
return self.builders[0].inner.to_vector_cloned();
|
||||
}
|
||||
|
||||
let mut unified_jsons = JsonStructsBuilder::new(self.merged_type.clone(), self.capacity);
|
||||
for builder in self.builders.iter() {
|
||||
unified_jsons
|
||||
.try_merge_cloned(builder)
|
||||
// Safety: the "unified_jsons" has the merged json type from all the builders,
|
||||
// so it should merge them without errors.
|
||||
.unwrap_or_else(|e| panic!("failed to merge json builders, error: {e}"));
|
||||
}
|
||||
unified_jsons.inner.to_vector_cloned()
|
||||
}
|
||||
|
||||
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
|
||||
let data_type = value.data_type();
|
||||
let json_type = data_type.as_json().with_context(|| TryFromValueSnafu {
|
||||
reason: format!("expected json value, got {value:?}"),
|
||||
})?;
|
||||
|
||||
let builder = match self.builders.last_mut() {
|
||||
Some(last) => {
|
||||
if &last.json_type != json_type {
|
||||
self.try_create_new_builder(json_type)?
|
||||
} else {
|
||||
last
|
||||
}
|
||||
}
|
||||
None => self.try_create_new_builder(json_type)?,
|
||||
};
|
||||
|
||||
let ValueRef::Json(value) = value else {
|
||||
// Safety: json datatype value must be the value of json.
|
||||
unreachable!()
|
||||
};
|
||||
builder.push(value)
|
||||
}
|
||||
|
||||
fn push_null(&mut self) {
|
||||
let null_json_value = ValueRef::Json(Box::new(ValueRef::Null));
|
||||
self.try_push_value_ref(&null_json_value)
|
||||
// Safety: learning from the method "try_push_value_ref", a null json value should be
|
||||
// always able to push into any json vectors.
|
||||
.unwrap_or_else(|e| {
|
||||
panic!("failed to push null json value: {null_json_value:?}, error: {e}")
|
||||
});
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, _: &dyn Vector, _: usize, _: usize) -> Result<()> {
|
||||
UnsupportedOperationSnafu {
|
||||
op: "extend_slice_of",
|
||||
vector_type: "JsonVector",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::json::JsonStructureSettings;
|
||||
use crate::vectors::helper::pretty_print;
|
||||
|
||||
fn push(json: &str, builder: &mut JsonVectorBuilder, expected: std::result::Result<(), &str>) {
|
||||
let settings = JsonStructureSettings::Structured(None);
|
||||
let json: serde_json::Value = serde_json::from_str(json).unwrap();
|
||||
let value = settings.encode(json).unwrap();
|
||||
|
||||
let value = value.as_value_ref();
|
||||
let result = builder.try_push_value_ref(&value);
|
||||
match (result, expected) {
|
||||
(Ok(()), Ok(())) => (),
|
||||
(Err(e), Err(expected)) => assert_eq!(e.to_string(), expected),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push_plain_jsons() -> Result<()> {
|
||||
let jsons = vec!["1", "2", r#""s""#, "[true]"];
|
||||
let results = vec![
|
||||
Ok(()),
|
||||
Ok(()),
|
||||
Err(
|
||||
"Failed to merge JSON datatype: datatypes have conflict, this: Int64, that: String",
|
||||
),
|
||||
Err(
|
||||
"Failed to merge JSON datatype: datatypes have conflict, this: Int64, that: List<Boolean>",
|
||||
),
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
for (json, result) in jsons.into_iter().zip(results.into_iter()) {
|
||||
push(json, &mut builder, result);
|
||||
}
|
||||
let vector = builder.to_vector();
|
||||
let expected = r#"
|
||||
+----------------+
|
||||
| StructVector |
|
||||
+----------------+
|
||||
| {__plain__: 1} |
|
||||
| {__plain__: 2} |
|
||||
+----------------+"#;
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push_json_objects() -> Result<()> {
|
||||
let jsons = vec![
|
||||
r#"{
|
||||
"s": "a",
|
||||
"list": [1, 2, 3]
|
||||
}"#,
|
||||
r#"{
|
||||
"list": [4],
|
||||
"s": "b"
|
||||
}"#,
|
||||
r#"{
|
||||
"s": "c",
|
||||
"float": 0.9
|
||||
}"#,
|
||||
r#"{
|
||||
"float": 0.8,
|
||||
"s": "d"
|
||||
}"#,
|
||||
r#"{
|
||||
"float": 0.7,
|
||||
"int": -1
|
||||
}"#,
|
||||
r#"{
|
||||
"int": 0,
|
||||
"float": 0.6
|
||||
}"#,
|
||||
r#"{
|
||||
"int": 1,
|
||||
"object": {"hello": "world", "timestamp": 1761523200000}
|
||||
}"#,
|
||||
r#"{
|
||||
"object": {"hello": "greptime", "timestamp": 1761523201000},
|
||||
"int": 2
|
||||
}"#,
|
||||
r#"{
|
||||
"object": {"timestamp": 1761523202000},
|
||||
"nested": {"a": {"b": {"b": {"a": "abba"}}}}
|
||||
}"#,
|
||||
r#"{
|
||||
"nested": {"a": {"b": {"a": {"b": "abab"}}}},
|
||||
"object": {"timestamp": 1761523203000}
|
||||
}"#,
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
for json in jsons {
|
||||
push(json, &mut builder, Ok(()));
|
||||
}
|
||||
assert_eq!(builder.len(), 10);
|
||||
|
||||
// test children builders:
|
||||
assert_eq!(builder.builders.len(), 6);
|
||||
let expect_types = [
|
||||
r#"Json<Struct<"list": List<Int64>, "s": String>>"#,
|
||||
r#"Json<Struct<"float": Float64, "s": String>>"#,
|
||||
r#"Json<Struct<"float": Float64, "int": Int64>>"#,
|
||||
r#"Json<Struct<"int": Int64, "object": Struct<"hello": String, "timestamp": Int64>>>"#,
|
||||
r#"Json<Struct<"nested": Struct<"a": Struct<"b": Struct<"b": Struct<"a": String>>>>, "object": Struct<"timestamp": Int64>>>"#,
|
||||
r#"Json<Struct<"nested": Struct<"a": Struct<"b": Struct<"a": Struct<"b": String>>>>, "object": Struct<"timestamp": Int64>>>"#,
|
||||
];
|
||||
let expect_vectors = [
|
||||
r#"
|
||||
+-------------------------+
|
||||
| StructVector |
|
||||
+-------------------------+
|
||||
| {list: [1, 2, 3], s: a} |
|
||||
| {list: [4], s: b} |
|
||||
+-------------------------+"#,
|
||||
r#"
|
||||
+--------------------+
|
||||
| StructVector |
|
||||
+--------------------+
|
||||
| {float: 0.9, s: c} |
|
||||
| {float: 0.8, s: d} |
|
||||
+--------------------+"#,
|
||||
r#"
|
||||
+-----------------------+
|
||||
| StructVector |
|
||||
+-----------------------+
|
||||
| {float: 0.7, int: -1} |
|
||||
| {float: 0.6, int: 0} |
|
||||
+-----------------------+"#,
|
||||
r#"
|
||||
+---------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+---------------------------------------------------------------+
|
||||
| {int: 1, object: {hello: world, timestamp: 1761523200000}} |
|
||||
| {int: 2, object: {hello: greptime, timestamp: 1761523201000}} |
|
||||
+---------------------------------------------------------------+"#,
|
||||
r#"
|
||||
+------------------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+------------------------------------------------------------------------+
|
||||
| {nested: {a: {b: {b: {a: abba}}}}, object: {timestamp: 1761523202000}} |
|
||||
+------------------------------------------------------------------------+"#,
|
||||
r#"
|
||||
+------------------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+------------------------------------------------------------------------+
|
||||
| {nested: {a: {b: {a: {b: abab}}}}, object: {timestamp: 1761523203000}} |
|
||||
+------------------------------------------------------------------------+"#,
|
||||
];
|
||||
for (builder, (expect_type, expect_vector)) in builder
|
||||
.builders
|
||||
.iter()
|
||||
.zip(expect_types.into_iter().zip(expect_vectors.into_iter()))
|
||||
{
|
||||
assert_eq!(builder.json_type.name(), expect_type);
|
||||
let vector = builder.inner.to_vector_cloned();
|
||||
assert_eq!(pretty_print(vector), expect_vector.trim());
|
||||
}
|
||||
|
||||
// test final merged json type:
|
||||
let expected = r#"Json<Struct<"float": Float64, "int": Int64, "list": List<Int64>, "nested": Struct<"a": Struct<"b": Struct<"a": Struct<"b": String>, "b": Struct<"a": String>>>>, "object": Struct<"hello": String, "timestamp": Int64>, "s": String>>"#;
|
||||
assert_eq!(builder.data_type().to_string(), expected);
|
||||
|
||||
// test final produced vector:
|
||||
let expected = r#"
|
||||
+-------------------------------------------------------------------------------------------------------------------+
|
||||
| StructVector |
|
||||
+-------------------------------------------------------------------------------------------------------------------+
|
||||
| {float: , int: , list: [1, 2, 3], nested: , object: , s: a} |
|
||||
| {float: , int: , list: [4], nested: , object: , s: b} |
|
||||
| {float: 0.9, int: , list: , nested: , object: , s: c} |
|
||||
| {float: 0.8, int: , list: , nested: , object: , s: d} |
|
||||
| {float: 0.7, int: -1, list: , nested: , object: , s: } |
|
||||
| {float: 0.6, int: 0, list: , nested: , object: , s: } |
|
||||
| {float: , int: 1, list: , nested: , object: {hello: world, timestamp: 1761523200000}, s: } |
|
||||
| {float: , int: 2, list: , nested: , object: {hello: greptime, timestamp: 1761523201000}, s: } |
|
||||
| {float: , int: , list: , nested: {a: {b: {a: , b: {a: abba}}}}, object: {hello: , timestamp: 1761523202000}, s: } |
|
||||
| {float: , int: , list: , nested: {a: {b: {a: {b: abab}, b: }}}, object: {hello: , timestamp: 1761523203000}, s: } |
|
||||
+-------------------------------------------------------------------------------------------------------------------+"#;
|
||||
let vector = builder.to_vector_cloned();
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
let vector = builder.to_vector();
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -323,6 +323,26 @@ impl StructVectorBuilder {
|
||||
}
|
||||
self.null_buffer.append_null();
|
||||
}
|
||||
|
||||
pub(crate) fn struct_type(&self) -> &StructType {
|
||||
&self.fields
|
||||
}
|
||||
|
||||
pub(crate) fn value_builders(&self) -> &[Box<dyn MutableVector>] {
|
||||
&self.value_builders
|
||||
}
|
||||
|
||||
pub(crate) fn mut_value_builders(&mut self) -> &mut [Box<dyn MutableVector>] {
|
||||
&mut self.value_builders
|
||||
}
|
||||
|
||||
pub(crate) fn null_buffer(&self) -> &NullBufferBuilder {
|
||||
&self.null_buffer
|
||||
}
|
||||
|
||||
pub(crate) fn mut_null_buffer(&mut self) -> &mut NullBufferBuilder {
|
||||
&mut self.null_buffer
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for StructVectorBuilder {
|
||||
|
||||
@@ -21,6 +21,7 @@ use std::sync::Arc;
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use api::v1::{RowDeleteRequest, RowDeleteRequests, RowInsertRequest, RowInsertRequests};
|
||||
use common_base::memory_limit::MemoryLimit;
|
||||
use common_config::Configurable;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
@@ -132,6 +133,7 @@ impl Default for FlownodeOptions {
|
||||
query: QueryOptions {
|
||||
parallelism: 1,
|
||||
allow_query_fallback: false,
|
||||
memory_pool_size: MemoryLimit::default(),
|
||||
},
|
||||
user_provider: None,
|
||||
memory: MemoryOptions::default(),
|
||||
|
||||
@@ -18,6 +18,7 @@ use std::collections::BTreeSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::flow::FlowMetadataManagerRef;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches, SendableRecordBatchStream};
|
||||
@@ -396,8 +397,8 @@ impl RefillTask {
|
||||
// we don't need information from query context in this query so a default query context is enough
|
||||
let query_ctx = Arc::new(
|
||||
QueryContextBuilder::default()
|
||||
.current_catalog("greptime".to_string())
|
||||
.current_schema("public".to_string())
|
||||
.current_catalog(DEFAULT_CATALOG_NAME.to_string())
|
||||
.current_schema(DEFAULT_SCHEMA_NAME.to_string())
|
||||
.build(),
|
||||
);
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ use api::v1::query_request::Query;
|
||||
use api::v1::{CreateTableExpr, QueryRequest};
|
||||
use client::{Client, Database};
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager, load_tls_config};
|
||||
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::store::RangeRequest;
|
||||
@@ -123,12 +123,10 @@ impl FrontendClient {
|
||||
let cfg = ChannelConfig::new()
|
||||
.connect_timeout(batch_opts.grpc_conn_timeout)
|
||||
.timeout(batch_opts.query_timeout);
|
||||
if let Some(tls) = &batch_opts.frontend_tls {
|
||||
let cfg = cfg.client_tls_config(tls.clone());
|
||||
ChannelManager::with_tls_config(cfg).context(InvalidClientConfigSnafu)?
|
||||
} else {
|
||||
ChannelManager::with_config(cfg)
|
||||
}
|
||||
|
||||
let tls_config = load_tls_config(batch_opts.frontend_tls.as_ref())
|
||||
.context(InvalidClientConfigSnafu)?;
|
||||
ChannelManager::with_config(cfg, tls_config)
|
||||
},
|
||||
auth,
|
||||
query,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -28,6 +28,7 @@ use common_function::scalars::udf::create_udf;
|
||||
use common_query::{Output, OutputData};
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::util;
|
||||
use common_telemetry::warn;
|
||||
use datafusion::dataframe::DataFrame;
|
||||
use datafusion::execution::SessionStateBuilder;
|
||||
use datafusion::execution::context::SessionContext;
|
||||
@@ -42,8 +43,9 @@ use servers::error::{
|
||||
};
|
||||
use servers::http::jaeger::{JAEGER_QUERY_TABLE_NAME_KEY, QueryTraceParams};
|
||||
use servers::otlp::trace::{
|
||||
DURATION_NANO_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN, SPAN_KIND_COLUMN,
|
||||
SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
|
||||
DURATION_NANO_COLUMN, KEY_OTEL_STATUS_ERROR_KEY, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN,
|
||||
SPAN_KIND_COLUMN, SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, SPAN_STATUS_CODE, SPAN_STATUS_ERROR,
|
||||
TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
|
||||
};
|
||||
use servers::query_handler::JaegerQueryHandler;
|
||||
use session::context::QueryContextRef;
|
||||
@@ -126,6 +128,7 @@ impl JaegerQueryHandler for Instance {
|
||||
trace_id: &str,
|
||||
start_time: Option<i64>,
|
||||
end_time: Option<i64>,
|
||||
limit: Option<usize>,
|
||||
) -> ServerResult<Output> {
|
||||
// It's equivalent to the following SQL query:
|
||||
//
|
||||
@@ -153,6 +156,13 @@ impl JaegerQueryHandler for Instance {
|
||||
filters.push(col(TIMESTAMP_COLUMN).lt_eq(lit_timestamp_nano(end_time)));
|
||||
}
|
||||
|
||||
let limit = if start_time.is_some() && end_time.is_some() {
|
||||
// allow unlimited limit if time range is specified
|
||||
limit
|
||||
} else {
|
||||
limit.or(Some(DEFAULT_LIMIT))
|
||||
};
|
||||
|
||||
Ok(query_trace_table(
|
||||
ctx,
|
||||
self.catalog_manager(),
|
||||
@@ -160,7 +170,7 @@ impl JaegerQueryHandler for Instance {
|
||||
selects,
|
||||
filters,
|
||||
vec![col(TIMESTAMP_COLUMN).sort(false, false)], // Sort by timestamp in descending order.
|
||||
Some(DEFAULT_LIMIT),
|
||||
limit,
|
||||
None,
|
||||
vec![],
|
||||
)
|
||||
@@ -263,7 +273,7 @@ impl JaegerQueryHandler for Instance {
|
||||
self.query_engine(),
|
||||
vec![wildcard()],
|
||||
filters,
|
||||
vec![],
|
||||
vec![col(TIMESTAMP_COLUMN).sort(false, false)], // Sort by timestamp in descending order.
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
@@ -322,6 +332,7 @@ async fn query_trace_table(
|
||||
})?;
|
||||
|
||||
let is_data_model_v1 = table
|
||||
.clone()
|
||||
.table_info()
|
||||
.meta
|
||||
.options
|
||||
@@ -330,6 +341,14 @@ async fn query_trace_table(
|
||||
.map(|s| s.as_str())
|
||||
== Some(TABLE_DATA_MODEL_TRACE_V1);
|
||||
|
||||
// collect to set
|
||||
let col_names = table
|
||||
.table_info()
|
||||
.meta
|
||||
.field_column_names()
|
||||
.map(|s| format!("\"{}\"", s))
|
||||
.collect::<HashSet<String>>();
|
||||
|
||||
let df_context = create_df_context(query_engine)?;
|
||||
|
||||
let dataframe = df_context
|
||||
@@ -342,7 +361,7 @@ async fn query_trace_table(
|
||||
let dataframe = filters
|
||||
.into_iter()
|
||||
.chain(tags.map_or(Ok(vec![]), |t| {
|
||||
tags_filters(&dataframe, t, is_data_model_v1)
|
||||
tags_filters(&dataframe, t, is_data_model_v1, &col_names)
|
||||
})?)
|
||||
.try_fold(dataframe, |df, expr| {
|
||||
df.filter(expr).context(DataFusionSnafu)
|
||||
@@ -472,23 +491,73 @@ fn json_tag_filters(
|
||||
Ok(filters)
|
||||
}
|
||||
|
||||
fn flatten_tag_filters(tags: HashMap<String, JsonValue>) -> ServerResult<Vec<Expr>> {
|
||||
/// Helper function to check if span_key or resource_key exists in col_names and create an expression.
|
||||
/// If neither exists, logs a warning and returns None.
|
||||
#[inline]
|
||||
fn check_col_and_build_expr<F>(
|
||||
span_key: String,
|
||||
resource_key: String,
|
||||
key: &str,
|
||||
col_names: &HashSet<String>,
|
||||
expr_builder: F,
|
||||
) -> Option<Expr>
|
||||
where
|
||||
F: FnOnce(String) -> Expr,
|
||||
{
|
||||
if col_names.contains(&span_key) {
|
||||
return Some(expr_builder(span_key));
|
||||
}
|
||||
if col_names.contains(&resource_key) {
|
||||
return Some(expr_builder(resource_key));
|
||||
}
|
||||
warn!("tag key {} not found in table columns", key);
|
||||
None
|
||||
}
|
||||
|
||||
fn flatten_tag_filters(
|
||||
tags: HashMap<String, JsonValue>,
|
||||
col_names: &HashSet<String>,
|
||||
) -> ServerResult<Vec<Expr>> {
|
||||
let filters = tags
|
||||
.into_iter()
|
||||
.filter_map(|(key, value)| {
|
||||
let key = format!("\"span_attributes.{}\"", key);
|
||||
if key == KEY_OTEL_STATUS_ERROR_KEY && value == JsonValue::Bool(true) {
|
||||
return Some(col(SPAN_STATUS_CODE).eq(lit(SPAN_STATUS_ERROR)));
|
||||
}
|
||||
|
||||
// TODO(shuiyisong): add more precise mapping from key to col name
|
||||
let span_key = format!("\"span_attributes.{}\"", key);
|
||||
let resource_key = format!("\"resource_attributes.{}\"", key);
|
||||
match value {
|
||||
JsonValue::String(value) => Some(col(key).eq(lit(value))),
|
||||
JsonValue::String(value) => {
|
||||
check_col_and_build_expr(span_key, resource_key, &key, col_names, |k| {
|
||||
col(k).eq(lit(value))
|
||||
})
|
||||
}
|
||||
JsonValue::Number(value) => {
|
||||
if value.is_f64() {
|
||||
// safe to unwrap as checked previously
|
||||
Some(col(key).eq(lit(value.as_f64().unwrap())))
|
||||
let value = value.as_f64().unwrap();
|
||||
check_col_and_build_expr(span_key, resource_key, &key, col_names, |k| {
|
||||
col(k).eq(lit(value))
|
||||
})
|
||||
} else {
|
||||
Some(col(key).eq(lit(value.as_i64().unwrap())))
|
||||
let value = value.as_i64().unwrap();
|
||||
check_col_and_build_expr(span_key, resource_key, &key, col_names, |k| {
|
||||
col(k).eq(lit(value))
|
||||
})
|
||||
}
|
||||
}
|
||||
JsonValue::Bool(value) => Some(col(key).eq(lit(value))),
|
||||
JsonValue::Null => Some(col(key).is_null()),
|
||||
JsonValue::Bool(value) => {
|
||||
check_col_and_build_expr(span_key, resource_key, &key, col_names, |k| {
|
||||
col(k).eq(lit(value))
|
||||
})
|
||||
}
|
||||
JsonValue::Null => {
|
||||
check_col_and_build_expr(span_key, resource_key, &key, col_names, |k| {
|
||||
col(k).is_null()
|
||||
})
|
||||
}
|
||||
// not supported at the moment
|
||||
JsonValue::Array(_value) => None,
|
||||
JsonValue::Object(_value) => None,
|
||||
@@ -502,9 +571,10 @@ fn tags_filters(
|
||||
dataframe: &DataFrame,
|
||||
tags: HashMap<String, JsonValue>,
|
||||
is_data_model_v1: bool,
|
||||
col_names: &HashSet<String>,
|
||||
) -> ServerResult<Vec<Expr>> {
|
||||
if is_data_model_v1 {
|
||||
flatten_tag_filters(tags)
|
||||
flatten_tag_filters(tags, col_names)
|
||||
} else {
|
||||
json_tag_filters(dataframe, tags)
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ async fn run() {
|
||||
.timeout(Duration::from_secs(3))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.tcp_nodelay(true);
|
||||
let channel_manager = ChannelManager::with_config(config);
|
||||
let channel_manager = ChannelManager::with_config(config, None);
|
||||
let mut meta_client = MetaClientBuilder::datanode_default_options(id)
|
||||
.channel_manager(channel_manager)
|
||||
.build();
|
||||
|
||||
@@ -101,7 +101,7 @@ pub async fn create_meta_client(
|
||||
|
||||
if let MetaClientType::Frontend = client_type {
|
||||
let ddl_config = base_config.clone().timeout(meta_client_options.ddl_timeout);
|
||||
builder = builder.ddl_channel_manager(ChannelManager::with_config(ddl_config));
|
||||
builder = builder.ddl_channel_manager(ChannelManager::with_config(ddl_config, None));
|
||||
if let Some(plugins) = plugins {
|
||||
let region_follower = plugins.get::<RegionFollowerClientRef>();
|
||||
if let Some(region_follower) = region_follower {
|
||||
@@ -112,8 +112,8 @@ pub async fn create_meta_client(
|
||||
}
|
||||
|
||||
builder = builder
|
||||
.channel_manager(ChannelManager::with_config(base_config))
|
||||
.heartbeat_channel_manager(ChannelManager::with_config(heartbeat_config));
|
||||
.channel_manager(ChannelManager::with_config(base_config, None))
|
||||
.heartbeat_channel_manager(ChannelManager::with_config(heartbeat_config, None));
|
||||
|
||||
let mut meta_client = builder.build();
|
||||
|
||||
|
||||
@@ -72,7 +72,10 @@ serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
snafu.workspace = true
|
||||
sqlx = { workspace = true, optional = true }
|
||||
sqlx = { workspace = true, features = [
|
||||
"mysql",
|
||||
"chrono",
|
||||
], optional = true }
|
||||
store-api.workspace = true
|
||||
strum.workspace = true
|
||||
table.workspace = true
|
||||
|
||||
@@ -26,6 +26,7 @@ use common_meta::distributed_time_constants::{
|
||||
use common_meta::error::Result;
|
||||
use common_meta::peer::{Peer, PeerDiscovery, PeerResolver};
|
||||
use common_meta::{DatanodeId, FlownodeId};
|
||||
use common_time::util::DefaultSystemTimer;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::cluster::MetaPeerClient;
|
||||
@@ -35,6 +36,7 @@ use crate::discovery::lease::{LeaseValueAccessor, LeaseValueType};
|
||||
impl PeerDiscovery for MetaPeerClient {
|
||||
async fn active_frontends(&self) -> Result<Vec<Peer>> {
|
||||
utils::alive_frontends(
|
||||
&DefaultSystemTimer,
|
||||
self,
|
||||
Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS),
|
||||
)
|
||||
@@ -47,20 +49,30 @@ impl PeerDiscovery for MetaPeerClient {
|
||||
&self,
|
||||
filter: Option<for<'a> fn(&'a NodeWorkloads) -> bool>,
|
||||
) -> Result<Vec<Peer>> {
|
||||
utils::alive_datanodes(self, Duration::from_secs(DATANODE_LEASE_SECS), filter)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
utils::alive_datanodes(
|
||||
&DefaultSystemTimer,
|
||||
self,
|
||||
Duration::from_secs(DATANODE_LEASE_SECS),
|
||||
filter,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn active_flownodes(
|
||||
&self,
|
||||
filter: Option<for<'a> fn(&'a NodeWorkloads) -> bool>,
|
||||
) -> Result<Vec<Peer>> {
|
||||
utils::alive_flownodes(self, Duration::from_secs(FLOWNODE_LEASE_SECS), filter)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
utils::alive_flownodes(
|
||||
&DefaultSystemTimer,
|
||||
self,
|
||||
Duration::from_secs(FLOWNODE_LEASE_SECS),
|
||||
filter,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -95,20 +95,22 @@ impl LeaseValueAccessor for MetaPeerClient {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicI64, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::DatanodeWorkloads;
|
||||
use api::v1::meta::heartbeat_request::NodeWorkloads;
|
||||
use api::v1::meta::{DatanodeWorkloads, FlownodeWorkloads};
|
||||
use common_meta::cluster::{FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus, Role};
|
||||
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
|
||||
use common_meta::kv_backend::ResettableKvBackendRef;
|
||||
use common_meta::peer::{Peer, PeerDiscovery};
|
||||
use common_meta::rpc::store::PutRequest;
|
||||
use common_time::util::current_time_millis;
|
||||
use common_time::util::{DefaultSystemTimer, SystemTimer, current_time_millis};
|
||||
use common_workload::DatanodeWorkloadType;
|
||||
|
||||
use crate::discovery::utils::{self, accept_ingest_workload};
|
||||
use crate::key::{DatanodeLeaseKey, LeaseValue};
|
||||
use crate::key::{DatanodeLeaseKey, FlownodeLeaseKey, LeaseValue};
|
||||
use crate::test_util::create_meta_peer_client;
|
||||
|
||||
async fn put_lease_value(
|
||||
@@ -126,17 +128,47 @@ mod tests {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn put_flownode_lease_value(
|
||||
kv_backend: &ResettableKvBackendRef,
|
||||
key: FlownodeLeaseKey,
|
||||
value: LeaseValue,
|
||||
) {
|
||||
kv_backend
|
||||
.put(PutRequest {
|
||||
key: key.try_into().unwrap(),
|
||||
value: value.try_into().unwrap(),
|
||||
prev_kv: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
struct MockTimer {
|
||||
current: Arc<AtomicI64>,
|
||||
}
|
||||
|
||||
impl SystemTimer for MockTimer {
|
||||
fn current_time_millis(&self) -> i64 {
|
||||
self.current.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn current_time_rfc3339(&self) -> String {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alive_datanodes() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = DefaultSystemTimer;
|
||||
|
||||
// put a stale lease value for node 1
|
||||
let key = DatanodeLeaseKey { node_id: 1 };
|
||||
let value = LeaseValue {
|
||||
// 20s ago
|
||||
timestamp_millis: current_time_millis() - lease_secs * 2 * 1000,
|
||||
timestamp_millis: timer.current_time_millis() - lease_secs * 2 * 1000,
|
||||
node_addr: "127.0.0.1:20201".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![DatanodeWorkloadType::Hybrid as i32],
|
||||
@@ -147,7 +179,7 @@ mod tests {
|
||||
// put a fresh lease value for node 2
|
||||
let key = DatanodeLeaseKey { node_id: 2 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: current_time_millis(),
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20202".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![DatanodeWorkloadType::Hybrid as i32],
|
||||
@@ -155,6 +187,37 @@ mod tests {
|
||||
};
|
||||
put_lease_value(&in_memory, key.clone(), value.clone()).await;
|
||||
let peers = utils::alive_datanodes(
|
||||
&timer,
|
||||
client.as_ref(),
|
||||
Duration::from_secs(lease_secs as u64),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers, vec![Peer::new(2, "127.0.0.1:20202".to_string())]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alive_datanodes_with_timer() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = MockTimer {
|
||||
current: Arc::new(AtomicI64::new(current_time_millis())),
|
||||
};
|
||||
|
||||
let key = DatanodeLeaseKey { node_id: 2 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20202".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![DatanodeWorkloadType::Hybrid as i32],
|
||||
}),
|
||||
};
|
||||
put_lease_value(&in_memory, key.clone(), value.clone()).await;
|
||||
let peers = utils::alive_datanodes(
|
||||
&timer,
|
||||
client.as_ref(),
|
||||
Duration::from_secs(lease_secs as u64),
|
||||
None,
|
||||
@@ -170,12 +233,13 @@ mod tests {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = DefaultSystemTimer;
|
||||
|
||||
// put a lease value for node 1 without mode info
|
||||
let key = DatanodeLeaseKey { node_id: 1 };
|
||||
let value = LeaseValue {
|
||||
// 20s ago
|
||||
timestamp_millis: current_time_millis() - 20 * 1000,
|
||||
timestamp_millis: timer.current_time_millis() - 20 * 1000,
|
||||
node_addr: "127.0.0.1:20201".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![DatanodeWorkloadType::Hybrid as i32],
|
||||
@@ -186,7 +250,7 @@ mod tests {
|
||||
// put a lease value for node 2 with mode info
|
||||
let key = DatanodeLeaseKey { node_id: 2 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: current_time_millis(),
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20202".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![DatanodeWorkloadType::Hybrid as i32],
|
||||
@@ -197,7 +261,7 @@ mod tests {
|
||||
// put a lease value for node 3 with mode info
|
||||
let key = DatanodeLeaseKey { node_id: 3 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: current_time_millis(),
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20203".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![i32::MAX],
|
||||
@@ -208,7 +272,7 @@ mod tests {
|
||||
// put a lease value for node 3 with mode info
|
||||
let key = DatanodeLeaseKey { node_id: 4 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: current_time_millis(),
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20204".to_string(),
|
||||
workloads: NodeWorkloads::Datanode(DatanodeWorkloads {
|
||||
types: vec![i32::MAX],
|
||||
@@ -217,6 +281,7 @@ mod tests {
|
||||
put_lease_value(&in_memory, key, value).await;
|
||||
|
||||
let peers = utils::alive_datanodes(
|
||||
&timer,
|
||||
client.as_ref(),
|
||||
Duration::from_secs(lease_secs),
|
||||
Some(accept_ingest_workload),
|
||||
@@ -227,18 +292,84 @@ mod tests {
|
||||
assert!(peers.contains(&Peer::new(2, "127.0.0.1:20202".to_string())));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alive_flownodes() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = DefaultSystemTimer;
|
||||
|
||||
// put a stale lease value for node 1
|
||||
let key = FlownodeLeaseKey { node_id: 1 };
|
||||
let value = LeaseValue {
|
||||
// 20s ago
|
||||
timestamp_millis: timer.current_time_millis() - lease_secs * 2 * 1000,
|
||||
node_addr: "127.0.0.1:20201".to_string(),
|
||||
workloads: NodeWorkloads::Flownode(FlownodeWorkloads { types: vec![] }),
|
||||
};
|
||||
put_flownode_lease_value(&in_memory, key, value).await;
|
||||
|
||||
// put a fresh lease value for node 2
|
||||
let key = FlownodeLeaseKey { node_id: 2 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20202".to_string(),
|
||||
workloads: NodeWorkloads::Flownode(FlownodeWorkloads { types: vec![] }),
|
||||
};
|
||||
put_flownode_lease_value(&in_memory, key.clone(), value.clone()).await;
|
||||
let peers = utils::alive_flownodes(
|
||||
&timer,
|
||||
client.as_ref(),
|
||||
Duration::from_secs(lease_secs as u64),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers, vec![Peer::new(2, "127.0.0.1:20202".to_string())]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alive_flownodes_with_timer() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = MockTimer {
|
||||
current: Arc::new(AtomicI64::new(current_time_millis())),
|
||||
};
|
||||
|
||||
let key = FlownodeLeaseKey { node_id: 2 };
|
||||
let value = LeaseValue {
|
||||
timestamp_millis: timer.current_time_millis(),
|
||||
node_addr: "127.0.0.1:20202".to_string(),
|
||||
workloads: NodeWorkloads::Flownode(FlownodeWorkloads { types: vec![] }),
|
||||
};
|
||||
put_flownode_lease_value(&in_memory, key.clone(), value.clone()).await;
|
||||
let peers = utils::alive_flownodes(
|
||||
&timer,
|
||||
client.as_ref(),
|
||||
Duration::from_secs(lease_secs as u64),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers, vec![Peer::new(2, "127.0.0.1:20202".to_string())]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lookup_frontends() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = DefaultSystemTimer;
|
||||
|
||||
let active_frontend_node = NodeInfo {
|
||||
peer: Peer {
|
||||
id: 0,
|
||||
addr: "127.0.0.1:20201".to_string(),
|
||||
},
|
||||
last_activity_ts: current_time_millis(),
|
||||
last_activity_ts: timer.current_time_millis(),
|
||||
status: NodeStatus::Frontend(FrontendStatus {}),
|
||||
version: "1.0.0".to_string(),
|
||||
git_commit: "1234567890".to_string(),
|
||||
@@ -266,7 +397,7 @@ mod tests {
|
||||
id: 1,
|
||||
addr: "127.0.0.1:20201".to_string(),
|
||||
},
|
||||
last_activity_ts: current_time_millis() - 20 * 1000,
|
||||
last_activity_ts: timer.current_time_millis() - 20 * 1000,
|
||||
status: NodeStatus::Frontend(FrontendStatus {}),
|
||||
version: "1.0.0".to_string(),
|
||||
git_commit: "1234567890".to_string(),
|
||||
@@ -287,9 +418,52 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let peers = utils::alive_frontends(client.as_ref(), Duration::from_secs(lease_secs))
|
||||
let peers =
|
||||
utils::alive_frontends(&timer, client.as_ref(), Duration::from_secs(lease_secs))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers[0].id, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lookup_frontends_with_timer() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
let timer = MockTimer {
|
||||
current: Arc::new(AtomicI64::new(current_time_millis())),
|
||||
};
|
||||
|
||||
let active_frontend_node = NodeInfo {
|
||||
peer: Peer {
|
||||
id: 0,
|
||||
addr: "127.0.0.1:20201".to_string(),
|
||||
},
|
||||
last_activity_ts: timer.current_time_millis(),
|
||||
status: NodeStatus::Frontend(FrontendStatus {}),
|
||||
version: "1.0.0".to_string(),
|
||||
git_commit: "1234567890".to_string(),
|
||||
start_time_ms: current_time_millis() as u64,
|
||||
total_cpu_millicores: 0,
|
||||
total_memory_bytes: 0,
|
||||
cpu_usage_millicores: 0,
|
||||
memory_usage_bytes: 0,
|
||||
hostname: "test_hostname".to_string(),
|
||||
};
|
||||
let key_prefix = NodeInfoKey::key_prefix_with_role(Role::Frontend);
|
||||
in_memory
|
||||
.put(PutRequest {
|
||||
key: format!("{}{}", key_prefix, "0").into(),
|
||||
value: active_frontend_node.try_into().unwrap(),
|
||||
prev_kv: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let peers =
|
||||
utils::alive_frontends(&timer, client.as_ref(), Duration::from_secs(lease_secs))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers[0].id, 0);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use common_meta::DatanodeId;
|
||||
use common_meta::cluster::NodeInfo;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::peer::Peer;
|
||||
use common_time::util::{DefaultSystemTimer, SystemTimer};
|
||||
use common_time::util::SystemTimer;
|
||||
use common_workload::DatanodeWorkloadType;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -49,16 +49,9 @@ pub trait LastActiveTs {
|
||||
/// Builds a filter closure that checks whether a [`LastActiveTs`] item
|
||||
/// is still within the specified active duration, relative to the
|
||||
/// current time provided by the given [`SystemTimer`].
|
||||
///
|
||||
/// The returned closure uses the timestamp at the time of building,
|
||||
/// so the "now" reference point is fixed when this function is called.
|
||||
pub fn build_active_filter<T: LastActiveTs>(
|
||||
timer: impl SystemTimer,
|
||||
active_duration: Duration,
|
||||
) -> impl Fn(&T) -> bool {
|
||||
let now = timer.current_time_millis();
|
||||
let active_duration = active_duration.as_millis() as u64;
|
||||
move |item: &T| {
|
||||
pub fn build_active_filter<T: LastActiveTs>(active_duration: Duration) -> impl Fn(i64, &T) -> bool {
|
||||
move |now: i64, item: &T| {
|
||||
let active_duration = active_duration.as_millis() as u64;
|
||||
let elapsed = now.saturating_sub(item.last_active_ts()) as u64;
|
||||
elapsed < active_duration
|
||||
}
|
||||
@@ -66,18 +59,19 @@ pub fn build_active_filter<T: LastActiveTs>(
|
||||
|
||||
/// Returns the alive datanodes.
|
||||
pub async fn alive_datanodes(
|
||||
timer: &impl SystemTimer,
|
||||
accessor: &impl LeaseValueAccessor,
|
||||
active_duration: Duration,
|
||||
condition: Option<fn(&NodeWorkloads) -> bool>,
|
||||
) -> Result<Vec<Peer>> {
|
||||
let active_filter = build_active_filter(DefaultSystemTimer, active_duration);
|
||||
let active_filter = build_active_filter(active_duration);
|
||||
let condition = condition.unwrap_or(|_| true);
|
||||
Ok(accessor
|
||||
.lease_values(LeaseValueType::Datanode)
|
||||
.await?
|
||||
let lease_values = accessor.lease_values(LeaseValueType::Datanode).await?;
|
||||
let now = timer.current_time_millis();
|
||||
Ok(lease_values
|
||||
.into_iter()
|
||||
.filter_map(|(peer_id, lease_value)| {
|
||||
if active_filter(&lease_value) && condition(&lease_value.workloads) {
|
||||
if active_filter(now, &lease_value) && condition(&lease_value.workloads) {
|
||||
Some(Peer::new(peer_id, lease_value.node_addr))
|
||||
} else {
|
||||
None
|
||||
@@ -88,18 +82,19 @@ pub async fn alive_datanodes(
|
||||
|
||||
/// Returns the alive flownodes.
|
||||
pub async fn alive_flownodes(
|
||||
timer: &impl SystemTimer,
|
||||
accessor: &impl LeaseValueAccessor,
|
||||
active_duration: Duration,
|
||||
condition: Option<fn(&NodeWorkloads) -> bool>,
|
||||
) -> Result<Vec<Peer>> {
|
||||
let active_filter = build_active_filter(DefaultSystemTimer, active_duration);
|
||||
let active_filter = build_active_filter(active_duration);
|
||||
let condition = condition.unwrap_or(|_| true);
|
||||
Ok(accessor
|
||||
.lease_values(LeaseValueType::Flownode)
|
||||
.await?
|
||||
let lease_values = accessor.lease_values(LeaseValueType::Flownode).await?;
|
||||
let now = timer.current_time_millis();
|
||||
Ok(lease_values
|
||||
.into_iter()
|
||||
.filter_map(|(peer_id, lease_value)| {
|
||||
if active_filter(&lease_value) && condition(&lease_value.workloads) {
|
||||
if active_filter(now, &lease_value) && condition(&lease_value.workloads) {
|
||||
Some(Peer::new(peer_id, lease_value.node_addr))
|
||||
} else {
|
||||
None
|
||||
@@ -110,16 +105,17 @@ pub async fn alive_flownodes(
|
||||
|
||||
/// Returns the alive frontends.
|
||||
pub async fn alive_frontends(
|
||||
timer: &impl SystemTimer,
|
||||
lister: &impl NodeInfoAccessor,
|
||||
active_duration: Duration,
|
||||
) -> Result<Vec<Peer>> {
|
||||
let active_filter = build_active_filter(DefaultSystemTimer, active_duration);
|
||||
Ok(lister
|
||||
.node_infos(NodeInfoType::Frontend)
|
||||
.await?
|
||||
let active_filter = build_active_filter(active_duration);
|
||||
let node_infos = lister.node_infos(NodeInfoType::Frontend).await?;
|
||||
let now = timer.current_time_millis();
|
||||
Ok(node_infos
|
||||
.into_iter()
|
||||
.filter_map(|(_, node_info)| {
|
||||
if active_filter(&node_info) {
|
||||
if active_filter(now, &node_info) {
|
||||
Some(node_info.peer)
|
||||
} else {
|
||||
None
|
||||
@@ -130,15 +126,18 @@ pub async fn alive_frontends(
|
||||
|
||||
/// Returns the alive datanode peer.
|
||||
pub async fn alive_datanode(
|
||||
timer: &impl SystemTimer,
|
||||
lister: &impl LeaseValueAccessor,
|
||||
peer_id: u64,
|
||||
active_duration: Duration,
|
||||
) -> Result<Option<Peer>> {
|
||||
let active_filter = build_active_filter(DefaultSystemTimer, active_duration);
|
||||
let v = lister
|
||||
let active_filter = build_active_filter(active_duration);
|
||||
let lease_value = lister
|
||||
.lease_value(LeaseValueType::Datanode, peer_id)
|
||||
.await?
|
||||
.filter(|(_, lease)| active_filter(lease))
|
||||
.await?;
|
||||
let now = timer.current_time_millis();
|
||||
let v = lease_value
|
||||
.filter(|(_, lease)| active_filter(now, lease))
|
||||
.map(|(peer_id, lease)| Peer::new(peer_id, lease.node_addr));
|
||||
|
||||
Ok(v)
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::key::{CANDIDATES_ROOT, ELECTION_KEY};
|
||||
use common_telemetry::{error, warn};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use common_time::Timestamp;
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use sqlx::mysql::{MySqlArguments, MySqlRow};
|
||||
@@ -645,6 +645,13 @@ impl Election for MySqlElection {
|
||||
}
|
||||
|
||||
async fn reset_campaign(&self) {
|
||||
info!("Resetting campaign");
|
||||
if self.is_leader.load(Ordering::Relaxed) {
|
||||
if let Err(err) = self.step_down_without_lock().await {
|
||||
error!(err; "Failed to step down without lock");
|
||||
}
|
||||
info!("Step down without lock successfully, due to reset campaign");
|
||||
}
|
||||
if let Err(err) = self.client.lock().await.reset_client().await {
|
||||
error!(err; "Failed to reset client");
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::key::{CANDIDATES_ROOT, ELECTION_KEY};
|
||||
use common_telemetry::{error, warn};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use common_time::Timestamp;
|
||||
use deadpool_postgres::{Manager, Pool};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
@@ -477,6 +477,13 @@ impl Election for PgElection {
|
||||
}
|
||||
|
||||
async fn reset_campaign(&self) {
|
||||
info!("Resetting campaign");
|
||||
if self.is_leader.load(Ordering::Relaxed) {
|
||||
if let Err(err) = self.step_down_without_lock().await {
|
||||
error!(err; "Failed to step down without lock");
|
||||
}
|
||||
info!("Step down without lock successfully, due to reset campaign");
|
||||
}
|
||||
if let Err(err) = self.pg_client.write().await.reset_client().await {
|
||||
error!(err; "Failed to reset client");
|
||||
}
|
||||
@@ -774,16 +781,12 @@ impl PgElection {
|
||||
key: key.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
if self
|
||||
.is_leader
|
||||
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_ok()
|
||||
&& let Err(e) = self
|
||||
.leader_watcher
|
||||
.send(LeaderChangeMessage::StepDown(Arc::new(leader_key)))
|
||||
{
|
||||
error!(e; "Failed to send leader change message");
|
||||
}
|
||||
send_leader_change_and_set_flags(
|
||||
&self.is_leader,
|
||||
&self.leader_infancy,
|
||||
&self.leader_watcher,
|
||||
LeaderChangeMessage::StepDown(Arc::new(leader_key)),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -62,10 +62,12 @@ pub(crate) struct RegionMigrationEvent {
|
||||
|
||||
impl RegionMigrationEvent {
|
||||
pub fn from_persistent_ctx(ctx: &PersistentContext) -> Self {
|
||||
// FIXME(weny): handle multiple region ids.
|
||||
let region_id = ctx.region_ids[0];
|
||||
Self {
|
||||
region_id: ctx.region_id,
|
||||
table_id: ctx.region_id.table_id(),
|
||||
region_number: ctx.region_id.region_number(),
|
||||
region_id,
|
||||
table_id: region_id.table_id(),
|
||||
region_number: region_id.region_number(),
|
||||
trigger_reason: ctx.trigger_reason,
|
||||
src_node_id: ctx.from_peer.id,
|
||||
src_peer_addr: ctx.from_peer.addr.clone(),
|
||||
|
||||
@@ -19,6 +19,7 @@ use api::v1::meta::{HeartbeatRequest, RegionLease, Role};
|
||||
use async_trait::async_trait;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::region_keeper::MemoryRegionKeeperRef;
|
||||
use common_telemetry::error;
|
||||
use store_api::region_engine::GrantedRegion;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -83,36 +84,44 @@ impl HeartbeatHandler for RegionLeaseHandler {
|
||||
let regions = stat.regions();
|
||||
let datanode_id = stat.id;
|
||||
|
||||
let RenewRegionLeasesResponse {
|
||||
non_exists,
|
||||
renewed,
|
||||
} = self
|
||||
match self
|
||||
.region_lease_keeper
|
||||
.renew_region_leases(datanode_id, ®ions)
|
||||
.await?;
|
||||
.await
|
||||
{
|
||||
Ok(RenewRegionLeasesResponse {
|
||||
non_exists,
|
||||
renewed,
|
||||
}) => {
|
||||
let renewed = if let Some(renewer) = &self.customized_region_lease_renewer {
|
||||
renewer
|
||||
.renew(ctx, renewed)
|
||||
.into_iter()
|
||||
.map(|region| region.into())
|
||||
.collect()
|
||||
} else {
|
||||
renewed
|
||||
.into_iter()
|
||||
.map(|(region_id, region_lease_info)| {
|
||||
GrantedRegion::new(region_id, region_lease_info.role).into()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
let renewed = if let Some(renewer) = &self.customized_region_lease_renewer {
|
||||
renewer
|
||||
.renew(ctx, renewed)
|
||||
.into_iter()
|
||||
.map(|region| region.into())
|
||||
.collect()
|
||||
} else {
|
||||
renewed
|
||||
.into_iter()
|
||||
.map(|(region_id, region_lease_info)| {
|
||||
GrantedRegion::new(region_id, region_lease_info.role).into()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
acc.region_lease = Some(RegionLease {
|
||||
regions: renewed,
|
||||
duration_since_epoch: req.duration_since_epoch,
|
||||
lease_seconds: self.region_lease_seconds,
|
||||
closeable_region_ids: non_exists.iter().map(|region| region.as_u64()).collect(),
|
||||
});
|
||||
acc.inactive_region_ids = non_exists;
|
||||
acc.region_lease = Some(RegionLease {
|
||||
regions: renewed,
|
||||
duration_since_epoch: req.duration_since_epoch,
|
||||
lease_seconds: self.region_lease_seconds,
|
||||
closeable_region_ids: non_exists.iter().map(|region| region.as_u64()).collect(),
|
||||
});
|
||||
acc.inactive_region_ids = non_exists;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(e; "Failed to renew region leases for datanode: {datanode_id:?}, regions: {:?}", regions);
|
||||
// If we throw error here, the datanode will be marked as failure by region failure handler.
|
||||
// So we only log the error and continue.
|
||||
}
|
||||
}
|
||||
|
||||
Ok(HandleControl::Continue)
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ use common_procedure::options::ProcedureConfig;
|
||||
use common_stat::ResourceStatRef;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use common_time::util::DefaultSystemTimer;
|
||||
use common_wal::config::MetasrvWalConfig;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::export_metrics::ExportMetricsOption;
|
||||
@@ -375,12 +376,14 @@ pub struct MetasrvNodeInfo {
|
||||
// The node total cpu millicores
|
||||
#[serde(default)]
|
||||
pub total_cpu_millicores: i64,
|
||||
#[serde(default)]
|
||||
// The node total memory bytes
|
||||
#[serde(default)]
|
||||
pub total_memory_bytes: i64,
|
||||
/// The node build cpu usage millicores
|
||||
#[serde(default)]
|
||||
pub cpu_usage_millicores: i64,
|
||||
/// The node build memory usage bytes
|
||||
#[serde(default)]
|
||||
pub memory_usage_bytes: i64,
|
||||
// The node hostname
|
||||
#[serde(default)]
|
||||
@@ -733,6 +736,7 @@ impl Metasrv {
|
||||
/// A datanode is considered alive when it's still within the lease period.
|
||||
pub(crate) async fn lookup_datanode_peer(&self, peer_id: u64) -> Result<Option<Peer>> {
|
||||
discovery::utils::alive_datanode(
|
||||
&DefaultSystemTimer,
|
||||
self.meta_peer_client.as_ref(),
|
||||
peer_id,
|
||||
Duration::from_secs(distributed_time_constants::DATANODE_LEASE_SECS),
|
||||
@@ -858,3 +862,18 @@ impl Metasrv {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::metasrv::MetasrvNodeInfo;
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_metasrv_node_info() {
|
||||
let str = r#"{"addr":"127.0.0.1:4002","version":"0.1.0","git_commit":"1234567890","start_time_ms":1715145600}"#;
|
||||
let node_info: MetasrvNodeInfo = serde_json::from_str(str).unwrap();
|
||||
assert_eq!(node_info.addr, "127.0.0.1:4002");
|
||||
assert_eq!(node_info.version, "0.1.0");
|
||||
assert_eq!(node_info.git_commit, "1234567890");
|
||||
assert_eq!(node_info.start_time_ms, 1715145600);
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user