mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
Compare commits
72 Commits
release/v0
...
v0.17.0-ni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4fb7d92f7c | ||
|
|
8659412cac | ||
|
|
dea87b7e57 | ||
|
|
a678b4dfd6 | ||
|
|
ccccaf7734 | ||
|
|
f0bec4940f | ||
|
|
5eb491df12 | ||
|
|
1d84e802d8 | ||
|
|
2992e70393 | ||
|
|
8a44137f37 | ||
|
|
777da35b0d | ||
|
|
9ad9a7d2bc | ||
|
|
ff5d672583 | ||
|
|
e495c614f7 | ||
|
|
e80e4a9ed7 | ||
|
|
1977ae50ee | ||
|
|
5cec0d4e3a | ||
|
|
d2d6489b2f | ||
|
|
25f926ea7d | ||
|
|
f159fcf599 | ||
|
|
e4454e0c7d | ||
|
|
0781adaa3d | ||
|
|
253d89b5cc | ||
|
|
3a2f5413e0 | ||
|
|
214ffe7109 | ||
|
|
3b1f172ab8 | ||
|
|
0215b39f61 | ||
|
|
01dc789816 | ||
|
|
bbe48e9e8b | ||
|
|
e2015ce1af | ||
|
|
7bb765af1d | ||
|
|
080b4b5d53 | ||
|
|
c7c8495a6b | ||
|
|
bbab35f285 | ||
|
|
6c6487ab30 | ||
|
|
757694ae38 | ||
|
|
39e2f122eb | ||
|
|
877ce6e893 | ||
|
|
c8da35c7e5 | ||
|
|
309e9d978c | ||
|
|
3a9f0220b5 | ||
|
|
cc35bab5e4 | ||
|
|
414db41219 | ||
|
|
ea024874e7 | ||
|
|
e64469bbc4 | ||
|
|
875207d26c | ||
|
|
9871c22740 | ||
|
|
50f7f61fdc | ||
|
|
9c3b83e84d | ||
|
|
e81d0f5861 | ||
|
|
29e0092468 | ||
|
|
67a93a07a2 | ||
|
|
1afa0afc67 | ||
|
|
414101fafa | ||
|
|
280024d7f8 | ||
|
|
865ca44dbd | ||
|
|
a3e55565dc | ||
|
|
bed0c1e55f | ||
|
|
572e29b158 | ||
|
|
31cb769507 | ||
|
|
e19493db4a | ||
|
|
9817eb934d | ||
|
|
8639961cc9 | ||
|
|
a9cd117706 | ||
|
|
9485dbed64 | ||
|
|
21b71d1e10 | ||
|
|
cfaa9b4dda | ||
|
|
19ad9a7f85 | ||
|
|
9e2f793b04 | ||
|
|
52466fdd92 | ||
|
|
869f8bf68a | ||
|
|
9527e0df2f |
@@ -12,7 +12,7 @@ runs:
|
||||
steps:
|
||||
- name: Install Etcd cluster
|
||||
shell: bash
|
||||
run: |
|
||||
run: |
|
||||
helm upgrade \
|
||||
--install etcd oci://registry-1.docker.io/bitnamicharts/etcd \
|
||||
--set replicaCount=${{ inputs.etcd-replicas }} \
|
||||
@@ -24,9 +24,4 @@ runs:
|
||||
--set auth.rbac.token.enabled=false \
|
||||
--set persistence.size=2Gi \
|
||||
--create-namespace \
|
||||
--set global.security.allowInsecureImages=true \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/etcd \
|
||||
--set image.tag=3.6.1-debian-12-r3 \
|
||||
--version 12.0.8 \
|
||||
-n ${{ inputs.namespace }}
|
||||
|
||||
@@ -51,7 +51,7 @@ runs:
|
||||
run: |
|
||||
helm upgrade \
|
||||
--install my-greptimedb \
|
||||
--set 'meta.backendStorage.etcd.endpoints[0]=${{ inputs.etcd-endpoints }}' \
|
||||
--set meta.backendStorage.etcd.endpoints=${{ inputs.etcd-endpoints }} \
|
||||
--set meta.enableRegionFailover=${{ inputs.enable-region-failover }} \
|
||||
--set image.registry=${{ inputs.image-registry }} \
|
||||
--set image.repository=${{ inputs.image-repository }} \
|
||||
|
||||
@@ -12,7 +12,7 @@ runs:
|
||||
steps:
|
||||
- name: Install Kafka cluster
|
||||
shell: bash
|
||||
run: |
|
||||
run: |
|
||||
helm upgrade \
|
||||
--install kafka oci://registry-1.docker.io/bitnamicharts/kafka \
|
||||
--set controller.replicaCount=${{ inputs.controller-replicas }} \
|
||||
@@ -23,8 +23,4 @@ runs:
|
||||
--set listeners.controller.protocol=PLAINTEXT \
|
||||
--set listeners.client.protocol=PLAINTEXT \
|
||||
--create-namespace \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/kafka \
|
||||
--set image.tag=3.9.0-debian-12-r1 \
|
||||
--version 31.0.0 \
|
||||
-n ${{ inputs.namespace }}
|
||||
|
||||
@@ -6,7 +6,9 @@ inputs:
|
||||
description: "Number of PostgreSQL replicas"
|
||||
namespace:
|
||||
default: "postgres-namespace"
|
||||
description: "The PostgreSQL namespace"
|
||||
postgres-version:
|
||||
default: "14.2"
|
||||
description: "PostgreSQL version"
|
||||
storage-size:
|
||||
default: "1Gi"
|
||||
description: "Storage size for PostgreSQL"
|
||||
@@ -20,11 +22,7 @@ runs:
|
||||
helm upgrade \
|
||||
--install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \
|
||||
--set replicaCount=${{ inputs.postgres-replicas }} \
|
||||
--set global.security.allowInsecureImages=true \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/postgresql \
|
||||
--set image.tag=17.5.0-debian-12-r3 \
|
||||
--version 16.7.4 \
|
||||
--set image.tag=${{ inputs.postgres-version }} \
|
||||
--set persistence.size=${{ inputs.storage-size }} \
|
||||
--set postgresql.username=greptimedb \
|
||||
--set postgresql.password=admin \
|
||||
|
||||
5
.github/scripts/deploy-greptimedb.sh
vendored
5
.github/scripts/deploy-greptimedb.sh
vendored
@@ -68,7 +68,7 @@ function deploy_greptimedb_cluster() {
|
||||
|
||||
helm install "$cluster_name" greptime/greptimedb-cluster \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints[0]="etcd.$install_namespace:2379" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
-n "$install_namespace"
|
||||
|
||||
# Wait for greptimedb cluster to be ready.
|
||||
@@ -103,13 +103,14 @@ function deploy_greptimedb_cluster_with_s3_storage() {
|
||||
|
||||
helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints[0]="etcd.$install_namespace:2379" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
|
||||
--set storage.s3.region="$AWS_REGION" \
|
||||
--set storage.s3.root="$DATA_ROOT" \
|
||||
--set storage.credentials.secretName=s3-credentials \
|
||||
--set storage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
|
||||
--set storage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
|
||||
|
||||
# Wait for greptimedb cluster to be ready.
|
||||
while true; do
|
||||
PHASE=$(kubectl -n "$install_namespace" get gtc "$cluster_name" -o jsonpath='{.status.clusterPhase}')
|
||||
|
||||
34
.github/scripts/pull-test-deps-images.sh
vendored
34
.github/scripts/pull-test-deps-images.sh
vendored
@@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script is used to pull the test dependency images that are stored in public ECR one by one to avoid rate limiting.
|
||||
|
||||
set -e
|
||||
|
||||
MAX_RETRIES=3
|
||||
|
||||
IMAGES=(
|
||||
"greptime/zookeeper:3.7"
|
||||
"greptime/kafka:3.9.0-debian-12-r1"
|
||||
"greptime/etcd:3.6.1-debian-12-r3"
|
||||
"greptime/minio:2024"
|
||||
"greptime/mysql:5.7"
|
||||
)
|
||||
|
||||
for image in "${IMAGES[@]}"; do
|
||||
for ((attempt=1; attempt<=MAX_RETRIES; attempt++)); do
|
||||
if docker pull "$image"; then
|
||||
# Successfully pulled the image.
|
||||
break
|
||||
else
|
||||
# Use some simple exponential backoff to avoid rate limiting.
|
||||
if [ $attempt -lt $MAX_RETRIES ]; then
|
||||
sleep_seconds=$((attempt * 5))
|
||||
echo "Attempt $attempt failed for $image, waiting $sleep_seconds seconds"
|
||||
sleep $sleep_seconds # 5s, 10s delays
|
||||
else
|
||||
echo "Failed to pull $image after $MAX_RETRIES attempts"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
4
.github/workflows/develop.yml
vendored
4
.github/workflows/develop.yml
vendored
@@ -719,10 +719,6 @@ jobs:
|
||||
save-if: ${{ github.ref == 'refs/heads/main' }}
|
||||
- name: Install latest nextest release
|
||||
uses: taiki-e/install-action@nextest
|
||||
|
||||
- name: Pull test dependencies images
|
||||
run: ./.github/scripts/pull-test-deps-images.sh
|
||||
|
||||
- name: Setup external services
|
||||
working-directory: tests-integration/fixtures
|
||||
run: docker compose up -d --wait
|
||||
|
||||
240
Cargo.lock
generated
240
Cargo.lock
generated
@@ -217,7 +217,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -950,7 +950,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1617,7 +1617,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1652,7 +1652,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow 54.2.1",
|
||||
@@ -1666,6 +1666,7 @@ dependencies = [
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
"common-event-recorder",
|
||||
"common-frontend",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
@@ -1992,7 +1993,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -2036,7 +2037,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -2045,7 +2046,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -2075,7 +2076,7 @@ dependencies = [
|
||||
"rand 0.9.0",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -2116,7 +2117,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -2178,7 +2179,7 @@ dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"stat",
|
||||
"store-api",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -2225,7 +2226,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -2247,11 +2248,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2277,7 +2278,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"arrow 54.2.1",
|
||||
"arrow-schema 54.3.1",
|
||||
@@ -2314,7 +2315,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.8",
|
||||
"common-error",
|
||||
@@ -2327,7 +2328,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-macro",
|
||||
"http 1.1.0",
|
||||
@@ -2338,7 +2339,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-event-recorder"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2347,6 +2348,9 @@ dependencies = [
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"itertools 0.14.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
@@ -2357,15 +2361,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
"common-error",
|
||||
"common-event-recorder",
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-time",
|
||||
"greptime-proto",
|
||||
"humantime",
|
||||
"meta-client",
|
||||
"serde",
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"tokio",
|
||||
@@ -2374,7 +2383,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -2433,7 +2442,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2450,7 +2459,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2483,7 +2492,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2503,7 +2512,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -2517,7 +2526,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common-error",
|
||||
@@ -2533,7 +2542,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2604,7 +2613,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2613,11 +2622,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2629,7 +2638,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-stream",
|
||||
@@ -2658,7 +2667,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2668,7 +2677,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2694,7 +2703,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2715,7 +2724,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -2745,14 +2754,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-session"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"strum 0.27.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-sql"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-datasource",
|
||||
@@ -2771,7 +2781,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-error",
|
||||
@@ -2799,7 +2809,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-grpc",
|
||||
@@ -2812,7 +2822,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"arrow 54.2.1",
|
||||
"chrono",
|
||||
@@ -2830,7 +2840,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"cargo-manifest",
|
||||
@@ -2841,7 +2851,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2864,7 +2874,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-workload"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-telemetry",
|
||||
@@ -3863,14 +3873,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"cache",
|
||||
"catalog",
|
||||
"client",
|
||||
"common-base",
|
||||
"common-config",
|
||||
@@ -3917,7 +3926,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
@@ -3927,7 +3936,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"arrow 54.2.1",
|
||||
"arrow-array 54.2.1",
|
||||
@@ -4602,7 +4611,7 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4739,7 +4748,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow 54.2.1",
|
||||
@@ -4806,7 +4815,7 @@ dependencies = [
|
||||
"sql",
|
||||
"store-api",
|
||||
"strum 0.27.1",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.12.3",
|
||||
@@ -4861,7 +4870,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -4877,6 +4886,7 @@ dependencies = [
|
||||
"common-config",
|
||||
"common-datasource",
|
||||
"common-error",
|
||||
"common-event-recorder",
|
||||
"common-frontend",
|
||||
"common-function",
|
||||
"common-grpc",
|
||||
@@ -4897,6 +4907,7 @@ dependencies = [
|
||||
"datanode",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"log-query",
|
||||
@@ -4922,7 +4933,7 @@ dependencies = [
|
||||
"sqlparser 0.54.0-greptime",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -5313,7 +5324,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=ccfd4da48bc0254ed865e479cd981a3581b02d84#ccfd4da48bc0254ed865e479cd981a3581b02d84"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=69680846a078aae670d93fb30511a72738345199#69680846a078aae670d93fb30511a72738345199"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"serde",
|
||||
@@ -6107,7 +6118,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -6132,6 +6143,7 @@ dependencies = [
|
||||
"prost 0.13.5",
|
||||
"puffin",
|
||||
"rand 0.9.0",
|
||||
"rand_chacha 0.9.0",
|
||||
"regex",
|
||||
"regex-automata 0.4.8",
|
||||
"roaring",
|
||||
@@ -6870,9 +6882,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.178"
|
||||
version = "0.2.171"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
|
||||
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
|
||||
|
||||
[[package]]
|
||||
name = "libflate"
|
||||
@@ -6928,7 +6940,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7005,13 +7017,13 @@ checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5"
|
||||
|
||||
[[package]]
|
||||
name = "local-ip-address"
|
||||
version = "0.6.5"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
|
||||
checksum = "3669cf5561f8d27e8fc84cc15e58350e70f557d4d65f70e3154e54cd2f8e1782"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"neli",
|
||||
"thiserror 2.0.12",
|
||||
"thiserror 1.0.64",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
@@ -7033,7 +7045,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -7045,7 +7057,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -7287,6 +7299,12 @@ version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
|
||||
|
||||
[[package]]
|
||||
name = "measure_time"
|
||||
version = "0.9.0"
|
||||
@@ -7342,7 +7360,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7370,7 +7388,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7468,7 +7486,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7493,6 +7511,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"mito-codec",
|
||||
"mito2",
|
||||
"moka",
|
||||
"mur3",
|
||||
"object-store",
|
||||
"prometheus",
|
||||
@@ -7560,7 +7579,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito-codec"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bytes",
|
||||
@@ -7583,7 +7602,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -8336,7 +8355,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -8348,7 +8367,7 @@ dependencies = [
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"md5",
|
||||
"md5 0.7.0",
|
||||
"moka",
|
||||
"opendal",
|
||||
"prometheus",
|
||||
@@ -8672,7 +8691,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -8728,7 +8747,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.54.0-greptime",
|
||||
"store-api",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -8987,7 +9006,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9181,8 +9200,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pgwire"
|
||||
version = "0.30.2"
|
||||
source = "git+https://github.com/sunng87/pgwire?rev=127573d997228cfb70c7699881c568eae8131270#127573d997228cfb70c7699881c568eae8131270"
|
||||
version = "0.32.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddf403a6ee31cf7f2217b2bd8447cb13dbb6c268d7e81501bc78a4d3daafd294"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -9191,7 +9211,7 @@ dependencies = [
|
||||
"futures",
|
||||
"hex",
|
||||
"lazy-regex",
|
||||
"md5",
|
||||
"md5 0.8.0",
|
||||
"postgres-types",
|
||||
"rand 0.9.0",
|
||||
"ring",
|
||||
@@ -9314,7 +9334,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -9458,7 +9478,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.19",
|
||||
@@ -9771,7 +9791,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-trait",
|
||||
@@ -10054,7 +10074,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-compression 0.4.13",
|
||||
"async-trait",
|
||||
@@ -10096,7 +10116,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -10162,7 +10182,7 @@ dependencies = [
|
||||
"sqlparser 0.54.0-greptime",
|
||||
"statrs",
|
||||
"store-api",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -10849,7 +10869,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "rskafka"
|
||||
version = "0.6.0"
|
||||
source = "git+https://github.com/WenyXu/rskafka.git?rev=9494304ae3947b07e660b5d08549ad4a39c84a26#9494304ae3947b07e660b5d08549ad4a39c84a26"
|
||||
source = "git+https://github.com/WenyXu/rskafka.git?rev=bc582e98918def613a882581a1b9331d186d9b2d#bc582e98918def613a882581a1b9331d186d9b2d"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -11515,7 +11535,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -11639,7 +11659,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -11979,7 +11999,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"chrono",
|
||||
@@ -12036,7 +12056,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -12123,9 +12143,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlx"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4410e73b3c0d8442c5f99b425d7a435b5ee0ae4167b3196771dd3f7a01be745f"
|
||||
checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc"
|
||||
dependencies = [
|
||||
"sqlx-core",
|
||||
"sqlx-macros",
|
||||
@@ -12136,10 +12156,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlx-core"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a007b6936676aa9ab40207cde35daab0a04b823be8ae004368c0793b96a61e0"
|
||||
checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc",
|
||||
@@ -12158,7 +12179,6 @@ dependencies = [
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
@@ -12173,9 +12193,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlx-macros"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3112e2ad78643fef903618d78cf0aec1cb3134b019730edb039b69eaf531f310"
|
||||
checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -12186,9 +12206,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlx-macros-core"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e9f90acc5ab146a99bf5061a7eb4976b573f560bc898ef3bf8435448dd5e7ad"
|
||||
checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b"
|
||||
dependencies = [
|
||||
"dotenvy",
|
||||
"either",
|
||||
@@ -12205,16 +12225,15 @@ dependencies = [
|
||||
"sqlx-postgres",
|
||||
"sqlx-sqlite",
|
||||
"syn 2.0.100",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlx-mysql"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233"
|
||||
checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
|
||||
dependencies = [
|
||||
"atoi",
|
||||
"base64 0.22.1",
|
||||
@@ -12255,9 +12274,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlx-postgres"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613"
|
||||
checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
|
||||
dependencies = [
|
||||
"atoi",
|
||||
"base64 0.22.1",
|
||||
@@ -12293,9 +12312,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlx-sqlite"
|
||||
version = "0.8.3"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f85ca71d3a5b24e64e1d08dd8fe36c6c95c339a896cc33068148906784620540"
|
||||
checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea"
|
||||
dependencies = [
|
||||
"atoi",
|
||||
"chrono",
|
||||
@@ -12311,6 +12330,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_urlencoded",
|
||||
"sqlx-core",
|
||||
"thiserror 2.0.12",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
@@ -12331,12 +12351,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stat"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"nix 0.30.1",
|
||||
]
|
||||
@@ -12362,7 +12382,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -12384,6 +12404,7 @@ dependencies = [
|
||||
"futures",
|
||||
"humantime",
|
||||
"lazy_static",
|
||||
"num_enum 0.7.3",
|
||||
"prometheus",
|
||||
"prost 0.13.5",
|
||||
"serde",
|
||||
@@ -12525,7 +12546,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -12726,7 +12747,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12995,7 +13016,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -13039,7 +13060,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -13058,6 +13079,7 @@ dependencies = [
|
||||
"common-config",
|
||||
"common-error",
|
||||
"common-event-recorder",
|
||||
"common-frontend",
|
||||
"common-grpc",
|
||||
"common-meta",
|
||||
"common-procedure",
|
||||
@@ -13109,7 +13131,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.16.0",
|
||||
"substrait 0.17.0",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
@@ -14598,7 +14620,7 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -73,7 +73,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -140,7 +140,7 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ccfd4da48bc0254ed865e479cd981a3581b02d84" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "69680846a078aae670d93fb30511a72738345199" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -188,8 +188,7 @@ reqwest = { version = "0.12", default-features = false, features = [
|
||||
"stream",
|
||||
"multipart",
|
||||
] }
|
||||
# Branch: feat/request-timeout-port
|
||||
rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "9494304ae3947b07e660b5d08549ad4a39c84a26", features = [
|
||||
rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "bc582e98918def613a882581a1b9331d186d9b2d", features = [
|
||||
"transport-tls",
|
||||
] }
|
||||
rstest = "0.25"
|
||||
|
||||
@@ -78,8 +78,6 @@
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
|
||||
| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
|
||||
@@ -277,6 +275,7 @@
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
@@ -306,7 +305,7 @@
|
||||
| `slow_query.record_type` | String | `system_table` | The record type of slow queries. It can be `system_table` or `log`.<br/>If `system_table` is selected, the slow queries will be recorded in a system table `greptime_private.slow_queries`.<br/>If `log` is selected, the slow queries will be logged in a log file `greptimedb-slow-queries.*`. |
|
||||
| `slow_query.threshold` | String | `30s` | The threshold of slow query. It can be human readable time string, for example: `10s`, `100ms`, `1s`. |
|
||||
| `slow_query.sample_ratio` | Float | `1.0` | The sampling ratio of slow query log. The value should be in the range of (0, 1]. For example, `0.1` means 10% of the slow queries will be logged and `1.0` means all slow queries will be logged. |
|
||||
| `slow_query.ttl` | String | `30d` | The TTL of the `slow_queries` system table. Default is `30d` when `record_type` is `system_table`. |
|
||||
| `slow_query.ttl` | String | `90d` | The TTL of the `slow_queries` system table. Default is `90d` when `record_type` is `system_table`. |
|
||||
| `export_metrics` | -- | -- | The frontend can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -317,6 +316,8 @@
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
| `event_recorder` | -- | -- | Configuration options for the event recorder. |
|
||||
| `event_recorder.ttl` | String | `90d` | TTL for the events table that will be used to store the events. Default is `90d`. |
|
||||
|
||||
|
||||
### Metasrv
|
||||
@@ -335,7 +336,6 @@
|
||||
| `region_failure_detector_initialization_delay` | String | `10m` | The delay before starting region failure detection.<br/>This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.<br/>Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled. |
|
||||
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
|
||||
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
|
||||
| `heartbeat_interval` | String | `3s` | Base heartbeat interval for calculating distributed time constants.<br/>The frontend heartbeat interval is 6 times of the base heartbeat interval.<br/>The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.<br/>e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.<br/>If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
@@ -346,18 +346,12 @@
|
||||
| `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
|
||||
| `backend_tls.ca_cert_path` | String | `""` | Path to CA certificate file (for server certificate verification)<br/>Required when using custom CAs or self-signed certificates<br/>Leave empty to use system root certificates only<br/>Like "/path/to/ca.crt" |
|
||||
| `backend_tls.watch` | Bool | `false` | Watch for certificate file changes and auto reload |
|
||||
| `backend_client` | -- | -- | The backend client options.<br/>Currently, only applicable when using etcd as the metadata store. |
|
||||
| `backend_client.keep_alive_timeout` | String | `3s` | The keep alive timeout for backend client. |
|
||||
| `backend_client.keep_alive_interval` | String | `10s` | The keep alive interval for backend client. |
|
||||
| `backend_client.connect_timeout` | String | `3s` | The connect timeout for backend client. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
|
||||
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
|
||||
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
|
||||
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
|
||||
| `grpc.http2_keep_alive_interval` | String | `10s` | The server side HTTP/2 keep-alive interval |
|
||||
| `grpc.http2_keep_alive_timeout` | String | `3s` | The server side HTTP/2 keep-alive timeout. |
|
||||
| `http` | -- | -- | The HTTP server options. |
|
||||
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
|
||||
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
|
||||
@@ -390,7 +384,7 @@
|
||||
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
|
||||
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
|
||||
| `event_recorder` | -- | -- | Configuration options for the event recorder. |
|
||||
| `event_recorder.ttl` | String | `30d` | TTL for the events table that will be used to store the events. |
|
||||
| `event_recorder.ttl` | String | `90d` | TTL for the events table that will be used to store the events. Default is `90d`. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
@@ -449,6 +443,7 @@
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
@@ -468,8 +463,6 @@
|
||||
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
|
||||
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
|
||||
@@ -600,6 +593,7 @@
|
||||
| `flow.batching_mode.experimental_frontend_activity_timeout` | String | `60s` | Frontend activity timeout<br/>if frontend is down(not sending heartbeat) for more than frontend_activity_timeout,<br/>it will be removed from the list that flownode use to connect |
|
||||
| `flow.batching_mode.experimental_max_filter_num_per_query` | Integer | `20` | Maximum number of filters allowed in a single query |
|
||||
| `flow.batching_mode.experimental_time_window_merge_threshold` | Integer | `3` | Time window merge distance |
|
||||
| `flow.batching_mode.read_preference` | String | `Leader` | Read preference of the Frontend client. |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
@@ -613,6 +607,7 @@
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
|
||||
@@ -92,6 +92,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
@@ -161,14 +164,6 @@ recovery_parallelism = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## The connect timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ connect_timeout = "3s"
|
||||
|
||||
## The timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ timeout = "3s"
|
||||
|
||||
## The max size of a single producer batch.
|
||||
## Warning: Kafka has a default limit of 1MB per message in a topic.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
|
||||
@@ -30,6 +30,8 @@ node_id = 14
|
||||
#+experimental_max_filter_num_per_query=20
|
||||
## Time window merge distance
|
||||
#+experimental_time_window_merge_threshold=3
|
||||
## Read preference of the Frontend client.
|
||||
#+read_preference="Leader"
|
||||
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
@@ -64,6 +66,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
|
||||
@@ -171,6 +171,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
@@ -254,8 +257,8 @@ threshold = "30s"
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1]. For example, `0.1` means 10% of the slow queries will be logged and `1.0` means all slow queries will be logged.
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The TTL of the `slow_queries` system table. Default is `30d` when `record_type` is `system_table`.
|
||||
ttl = "30d"
|
||||
## The TTL of the `slow_queries` system table. Default is `90d` when `record_type` is `system_table`.
|
||||
ttl = "90d"
|
||||
|
||||
## The frontend can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
@@ -285,3 +288,8 @@ headers = { }
|
||||
## is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
## Default is true.
|
||||
enable_heap_profiling = true
|
||||
|
||||
## Configuration options for the event recorder.
|
||||
[event_recorder]
|
||||
## TTL for the events table that will be used to store the events. Default is `90d`.
|
||||
ttl = "90d"
|
||||
|
||||
@@ -55,13 +55,6 @@ allow_region_failover_on_local_wal = false
|
||||
## Max allowed idle time before removing node info from metasrv memory.
|
||||
node_max_idle_time = "24hours"
|
||||
|
||||
## Base heartbeat interval for calculating distributed time constants.
|
||||
## The frontend heartbeat interval is 6 times of the base heartbeat interval.
|
||||
## The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.
|
||||
## e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.
|
||||
## If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly.
|
||||
#+ heartbeat_interval = "3s"
|
||||
|
||||
## Whether to enable greptimedb telemetry. Enabled by default.
|
||||
#+ enable_telemetry = true
|
||||
|
||||
@@ -100,16 +93,6 @@ ca_cert_path = ""
|
||||
## Watch for certificate file changes and auto reload
|
||||
watch = false
|
||||
|
||||
## The backend client options.
|
||||
## Currently, only applicable when using etcd as the metadata store.
|
||||
#+ [backend_client]
|
||||
## The keep alive timeout for backend client.
|
||||
#+ keep_alive_timeout = "3s"
|
||||
## The keep alive interval for backend client.
|
||||
#+ keep_alive_interval = "10s"
|
||||
## The connect timeout for backend client.
|
||||
#+ connect_timeout = "3s"
|
||||
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
@@ -124,10 +107,6 @@ runtime_size = 8
|
||||
max_recv_message_size = "512MB"
|
||||
## The maximum send message size for gRPC server.
|
||||
max_send_message_size = "512MB"
|
||||
## The server side HTTP/2 keep-alive interval
|
||||
#+ http2_keep_alive_interval = "10s"
|
||||
## The server side HTTP/2 keep-alive timeout.
|
||||
#+ http2_keep_alive_timeout = "3s"
|
||||
|
||||
## The HTTP server options.
|
||||
[http]
|
||||
@@ -263,8 +242,8 @@ create_topic_timeout = "30s"
|
||||
|
||||
## Configuration options for the event recorder.
|
||||
[event_recorder]
|
||||
## TTL for the events table that will be used to store the events.
|
||||
ttl = "30d"
|
||||
## TTL for the events table that will be used to store the events. Default is `90d`.
|
||||
ttl = "90d"
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
|
||||
@@ -209,14 +209,6 @@ recovery_parallelism = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
broker_endpoints = ["127.0.0.1:9092"]
|
||||
|
||||
## The connect timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ connect_timeout = "3s"
|
||||
|
||||
## The timeout for kafka client.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
#+ timeout = "3s"
|
||||
|
||||
## Automatically create topics for WAL.
|
||||
## Set to `true` to automatically create topics for WAL.
|
||||
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
|
||||
|
||||
157
docs/rfcs/2025-07-23-global-gc-worker.md
Normal file
157
docs/rfcs/2025-07-23-global-gc-worker.md
Normal file
@@ -0,0 +1,157 @@
|
||||
---
|
||||
Feature Name: "global-gc-worker"
|
||||
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/6571
|
||||
Date: 2025-07-23
|
||||
Author: "discord9 <discord9@163.com>"
|
||||
---
|
||||
|
||||
# Global GC Worker
|
||||
|
||||
## Summary
|
||||
|
||||
This RFC proposes the integration of a garbage collection (GC) mechanism within the Compaction process. This mechanism aims to manage and remove stale files that are no longer actively used by any system component, thereby reclaiming storage space.
|
||||
|
||||
## Motivation
|
||||
|
||||
With the introduction of features such as table repartitioning, a substantial number of Parquet files can become obsolete. Furthermore, failures during manifest updates may result in orphaned files that are never referenced by the system. Therefore, a periodic garbage collection mechanism is essential to reclaim storage space by systematically removing these unused files.
|
||||
|
||||
## Details
|
||||
|
||||
### Overview
|
||||
|
||||
The garbage collection process will be integrated directly into the Compaction process. Upon the completion of a Compaction for a given region, the GC worker will be automatically triggered. Its primary function will be to identify and subsequently delete obsolete files that have persisted beyond their designated retention period. This integration ensures that garbage collection is performed in close conjunction with data lifecycle management, effectively leveraging the compaction process's inherent knowledge of file states.
|
||||
|
||||
This design prioritizes correctness and safety by explicitly linking GC execution to a well-defined operational boundary: the successful completion of a compaction cycle.
|
||||
|
||||
### Terminology
|
||||
|
||||
- **Unused File**: Refers to a file present in the storage directory that has never been formally recorded in any manifest. A common scenario for this includes cases where a new SST file is successfully written to storage, but the subsequent update to the manifest fails, leaving the file unreferenced.
|
||||
- **Obsolete File**: Denotes a file that was previously recorded in a manifest but has since been explicitly marked for removal. This typically occurs following operations such as data repartitioning or compaction.
|
||||
|
||||
### GC Worker Process
|
||||
|
||||
The GC worker operates as an integral part of the Compaction process. Once a Compaction for a specific region is completed, the GC worker is automatically triggered. Executing this process on a `datanode` is preferred to eliminate the overhead associated with having to set object storage configurations in the `metasrv`.
|
||||
|
||||
The detailed process is as follows:
|
||||
|
||||
1. **Invocation**: Upon the successful completion of a Compaction for a region, the GC worker is invoked.
|
||||
2. **Manifest Reading**: The worker reads the region's primary manifest to obtain a comprehensive list of all files marked as obsolete. Concurrently, it reads any temporary manifests generated by long-running queries to identify files that are currently in active use, thereby preventing their premature deletion.
|
||||
3. **Lingering Time Check (Obsolete Files)**: For each identified obsolete file, the GC worker evaluates its "lingering time." Which is the time passed after it had been removed from manifest.
|
||||
4. **Deletion Marking (Obsolete Files)**: Files that have exceeded their maximum configurable lingering time and are not referenced by any active temporary manifests are marked for deletion.
|
||||
5. **Lingering Time (Unused Files)**: Unused files (those never recorded in any manifest) are also subject to a configurable maximum lingering time before they are eligible for deletion.
|
||||
|
||||
Following flowchart illustrates the GC worker's process:
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Compaction Completed] --> B[Trigger GC Worker]
|
||||
B --> C[Scan Region Manifest]
|
||||
C --> D[Identify File Types]
|
||||
D --> E[Unused Files<br/>Never recorded in manifest]
|
||||
D --> F[Obsolete Files<br/>Previously in manifest<br/>but marked for removal]
|
||||
E --> G[Check Lingering Time]
|
||||
F --> G
|
||||
G --> H{File exceeds<br/>configured lingering time?}
|
||||
H -->|No| I[Skip deletion]
|
||||
H -->|Yes| J[Check Temporary Manifest]
|
||||
J --> K{File in use by<br/>active queries?}
|
||||
K -->|Yes| L[Retain file<br/>Wait for next GC cycle]
|
||||
K -->|No| M[Safely delete file]
|
||||
I --> N[End GC cycle]
|
||||
L --> N
|
||||
M --> O[Update Manifest]
|
||||
O --> N
|
||||
N --> P[Wait for next Compaction]
|
||||
P --> A
|
||||
style A fill:#e1f5fe
|
||||
style B fill:#f3e5f5
|
||||
style M fill:#e8f5e8
|
||||
style L fill:#fff3e0
|
||||
```
|
||||
|
||||
#### Handling Obsolete Files
|
||||
|
||||
An obsolete file is permanently deleted only if two conditions are met:
|
||||
1. The time elapsed since its removal from the manifest (its obsolescence timestamp) exceeds a configurable threshold.
|
||||
2. It is not currently referenced by any active temporary manifests.
|
||||
|
||||
|
||||
#### Handling Unused Files
|
||||
|
||||
With the integration of the GC worker into the Compaction process, the risk of accidentally deleting newly created SST files that have not yet been recorded in the manifest is significantly mitigated. Consequently, the concept of "Unused Files" as a distinct category primarily susceptible to accidental deletion is largely resolved. Any files that are genuinely "unused" (i.e., never referenced by any manifest, including temporary ones) can be safely deleted after a configurable maximum lingering time.
|
||||
|
||||
For debugging and auditing purposes, a comprehensive list of recently deleted files can be maintained.
|
||||
|
||||
### Ensuring Read Consistency
|
||||
|
||||
To prevent the GC worker from inadvertently deleting files that are actively being utilized by long-running analytical queries, a robust protection mechanism is introduced. This mechanism relies on temporary manifests that are actively kept "alive" by the queries using them.
|
||||
|
||||
When a long-running query is detected (e.g., by a slow query recorder), it will write a temporary manifest to the region's manifest directory. This manifest lists all files required for the query. However, simply creating this file is not enough, as a query runner might crash, leaving the temporary manifest orphaned and preventing garbage collection indefinitely.
|
||||
|
||||
To address this, the following "heartbeat" mechanism is implemented:
|
||||
1. **Periodic Updates**: The process executing the long-running query is responsible for periodically updating the modification timestamp of its temporary manifest file (i.e., "touching" the file). This serves as a heartbeat, signaling that the query is still active.
|
||||
2. **GC Worker Verification**: When the GC worker runs, it scans for temporary manifests. For each one it finds, it checks the file's last modification time.
|
||||
3. **Stale File Handling**: If a temporary manifest's last modification time is older than a configurable threshold, the GC worker considers it stale (left over from a crashed or terminated query). The GC worker will then delete this stale temporary manifest. Files that were protected only by this stale manifest are no longer shielded from garbage collection.
|
||||
|
||||
This approach ensures that only files for genuinely active queries are protected. The lifecycle of the temporary manifest is managed dynamically: it is created when a long query starts, kept alive through periodic updates, and is either deleted by the query upon normal completion or automatically cleaned up by the GC worker if the query terminates unexpectedly.
|
||||
|
||||
This mechanism may be too complex to implement at once. We can consider a two-phased approach:
|
||||
1. **Phase 1 (Simple Time-Based Deletion)**: Initially, implement a simpler GC strategy that deletes obsolete files based solely on a configurable lingering time. This provides a baseline for space reclamation without the complexity of temporary manifests.
|
||||
2. **Phase 2 (Consistency-Aware GC)**: Based on the practical effectiveness and observed issues from Phase 1, we can then decide whether to implement the full temporary manifest and heartbeat mechanism to handle long-running queries. This iterative approach allows for a quicker initial implementation while gathering real-world data to justify the need for a more complex solution.
|
||||
|
||||
## Drawbacks
|
||||
|
||||
- **Dependency on Compaction Frequency**: The integration of the GC worker with Compaction means that GC cycles are directly tied to the frequency of compactions. In environments with infrequent compaction operations, obsolete files may accumulate for extended periods before being reclaimed, potentially leading to increased storage consumption.
|
||||
- **Race Condition with Long-Running Queries**: A potential race condition exists if a long-running query initiates but haven't write its temporary manifest in time, while a compaction process simultaneously begins and marks files used by that query as obsolete. This scenario could lead to the premature deletion of files still required by the active query. To mitigate this, the threshold time for writing a temporary manifest should be significantly shorter than the lingering time configured for obsolete files, ensuring that next GC worker runs do not delete files that are now referenced by a temporary manifest if the query is still running.
|
||||
Also the read replica shouldn't be later in manifest version for more than the lingering time of obsolete files, otherwise it might ref to files that are already deleted by the GC worker.
|
||||
- need to upload tmp manifest to object storage, which may introduce additional complexity and potential performance overhead. But since long-running queries are typically not frequent, the performance impact is expected to be minimal.
|
||||
|
||||
|
||||
## Conclusion and Rationale
|
||||
|
||||
This section summarizes the key aspects and trade-offs of the proposed integrated GC worker, highlighting its advantages and potential challenges.
|
||||
|
||||
| Aspect | Current Proposal (Integrated GC) |
|
||||
| :--- | :--- |
|
||||
| **Implementation Complexity** | **Medium**. Requires careful integration with the compaction process and the slow query recorder for temporary manifest management. |
|
||||
| **Reliability** | **High**. Integration with compaction and leveraging temporary manifests from long-running queries significantly mitigates the risk of incorrect deletion. Accurate management of lingering times for obsolete files and prevention of accidental deletion of newly created SSTs enhance data safety. |
|
||||
| **Performance Overhead** | **Low to Medium**. The GC worker runs post-compaction, minimizing direct impact on write paths. Overhead from temporary manifest management by the slow query recorder is expected to be acceptable for long-running queries. |
|
||||
| **Impact on Other Components** | **Moderate**. Requires modifications to the compaction process to trigger GC and the slow query recorder to manage temporary manifests. This introduces some coupling but enhances overall data safety. |
|
||||
| **Deletion Strategy** | **State- and Time-Based**. Obsolete files are deleted based on a configurable lingering time, which is paused if the file is referenced by a temporary manifest. Unused files (never in a manifest) are also subject to a lingering time. |
|
||||
|
||||
## Unresolved Questions and Future Work
|
||||
|
||||
This section outlines key areas requiring further discussion and defines potential avenues for future development.
|
||||
|
||||
* **Slow Query Recorder Implementation**: Detailed specifications for modify slow query recorder's implementation and its precise interaction mechanisms with temporary manifests are needed.
|
||||
* **Configurable Lingering Times**: Establish and make configurable the specific lingering times for both obsolete and unused files to optimize storage reclamation and data availability.
|
||||
|
||||
## Alternatives
|
||||
|
||||
### 1. Standalone GC Service
|
||||
|
||||
Instead of integrating the GC worker directly into the Compaction process, a standalone GC service could be implemented. This service would operate independently, periodically scanning the storage for obsolete and unused files based on manifest information and predefined retention policies.
|
||||
|
||||
**Pros:**
|
||||
* **Decoupling**: Separates GC logic from compaction, allowing independent scaling and deployment.
|
||||
* **Flexibility**: Can be configured to run at different frequencies and with different strategies than compaction.
|
||||
|
||||
**Cons:**
|
||||
* **Increased Complexity**: Requires a separate service to manage, monitor, and coordinate with other components.
|
||||
* **Potential for Redundancy**: May duplicate some file scanning logic already present in compaction.
|
||||
* **Consistency Challenges**: Ensuring read consistency would require more complex coordination mechanisms between the standalone GC service and active queries, potentially involving a distributed lock manager or a more sophisticated temporary manifest system.
|
||||
|
||||
This alternative could be implemented in the future if the integrated GC worker proves insufficient or if there is a need for more advanced GC strategies.
|
||||
|
||||
### 2. Manifest-Driven Deletion (No Lingering Time)
|
||||
|
||||
This alternative would involve immediate deletion of files once they are removed from the manifest, without a lingering time.
|
||||
|
||||
**Pros:**
|
||||
* **Simplicity**: Simplifies the GC logic by removing the need for lingering time management.
|
||||
* **Immediate Space Reclamation**: Storage space is reclaimed as soon as files are marked for deletion.
|
||||
|
||||
**Cons:**
|
||||
* **Increased Risk of Data Loss**: Higher risk of deleting files still in use by long-running queries or other processes if not perfectly synchronized.
|
||||
* **Complex Read Consistency**: Requires extremely robust and immediate mechanisms to ensure that no active queries are referencing files marked for deletion, potentially leading to performance bottlenecks or complex error handling.
|
||||
* **Debugging Challenges**: Difficult to debug issues related to premature file deletion due to the immediate nature of the operation.
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
pub mod column_def;
|
||||
|
||||
pub mod helper;
|
||||
|
||||
pub mod meta {
|
||||
pub use greptime_proto::v1::meta::*;
|
||||
}
|
||||
|
||||
65
src/api/src/v1/helper.rs
Normal file
65
src/api/src/v1/helper.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{ColumnDataType, ColumnSchema, Row, SemanticType, Value};
|
||||
|
||||
/// Create a time index [ColumnSchema] with column's name and datatype.
|
||||
/// Other fields are left default.
|
||||
/// Useful when you just want to create a simple [ColumnSchema] without providing much struct fields.
|
||||
pub fn time_index_column_schema(name: &str, datatype: ColumnDataType) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
column_name: name.to_string(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: SemanticType::Timestamp as i32,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a tag [ColumnSchema] with column's name and datatype.
|
||||
/// Other fields are left default.
|
||||
/// Useful when you just want to create a simple [ColumnSchema] without providing much struct fields.
|
||||
pub fn tag_column_schema(name: &str, datatype: ColumnDataType) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
column_name: name.to_string(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a field [ColumnSchema] with column's name and datatype.
|
||||
/// Other fields are left default.
|
||||
/// Useful when you just want to create a simple [ColumnSchema] without providing much struct fields.
|
||||
pub fn field_column_schema(name: &str, datatype: ColumnDataType) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
column_name: name.to_string(),
|
||||
datatype: datatype as i32,
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a [Row] from [ValueData]s.
|
||||
/// Useful when you don't want to write much verbose codes.
|
||||
pub fn row(values: Vec<ValueData>) -> Row {
|
||||
Row {
|
||||
values: values
|
||||
.into_iter()
|
||||
.map(|x| Value {
|
||||
value_data: Some(x),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ bytes.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
common-event-recorder.workspace = true
|
||||
common-frontend.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
|
||||
@@ -29,7 +29,6 @@ use crate::information_schema::{InformationExtensionRef, InformationSchemaProvid
|
||||
use crate::kvbackend::manager::{SystemCatalog, CATALOG_CACHE_MAX_CAPACITY};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::numbers_table_provider::NumbersTableProvider;
|
||||
use crate::system_schema::pg_catalog::PGCatalogProvider;
|
||||
|
||||
pub struct KvBackendCatalogManagerBuilder {
|
||||
@@ -120,7 +119,6 @@ impl KvBackendCatalogManagerBuilder {
|
||||
DEFAULT_CATALOG_NAME.to_string(),
|
||||
me.clone(),
|
||||
)),
|
||||
numbers_table_provider: NumbersTableProvider,
|
||||
backend,
|
||||
process_manager,
|
||||
#[cfg(feature = "enterprise")]
|
||||
|
||||
@@ -18,7 +18,8 @@ use std::sync::{Arc, Weak};
|
||||
|
||||
use async_stream::try_stream;
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
|
||||
PG_CATALOG_NAME,
|
||||
};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::{
|
||||
@@ -42,6 +43,8 @@ use snafu::prelude::*;
|
||||
use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::{TableId, TableInfoRef};
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
use table::table::PartitionRules;
|
||||
use table::table_name::TableName;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Semaphore;
|
||||
@@ -56,7 +59,6 @@ use crate::information_schema::InformationSchemaTableFactoryRef;
|
||||
use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
|
||||
use crate::kvbackend::TableCacheRef;
|
||||
use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::numbers_table_provider::NumbersTableProvider;
|
||||
use crate::system_schema::pg_catalog::PGCatalogProvider;
|
||||
use crate::system_schema::SystemSchemaProvider;
|
||||
use crate::CatalogManager;
|
||||
@@ -131,6 +133,8 @@ impl KvBackendCatalogManager {
|
||||
{
|
||||
let mut new_table_info = (*table.table_info()).clone();
|
||||
|
||||
let mut phy_part_cols_not_in_logical_table = vec![];
|
||||
|
||||
// Remap partition key indices from physical table to logical table
|
||||
new_table_info.meta.partition_key_indices = physical_table_info_value
|
||||
.table_info
|
||||
@@ -147,15 +151,30 @@ impl KvBackendCatalogManager {
|
||||
.get(physical_index)
|
||||
.and_then(|physical_column| {
|
||||
// Find the corresponding index in the logical table schema
|
||||
new_table_info
|
||||
let idx = new_table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_index_by_name(physical_column.name.as_str())
|
||||
.column_index_by_name(physical_column.name.as_str());
|
||||
if idx.is_none() {
|
||||
// not all part columns in physical table that are also in logical table
|
||||
phy_part_cols_not_in_logical_table
|
||||
.push(physical_column.name.clone());
|
||||
}
|
||||
|
||||
idx
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let new_table = DistTable::table(Arc::new(new_table_info));
|
||||
let partition_rules = if !phy_part_cols_not_in_logical_table.is_empty() {
|
||||
Some(PartitionRules {
|
||||
extra_phy_cols_not_in_logical_table: phy_part_cols_not_in_logical_table,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let new_table = DistTable::table_partitioned(Arc::new(new_table_info), partition_rules);
|
||||
|
||||
return Ok(new_table);
|
||||
}
|
||||
@@ -536,7 +555,6 @@ pub(super) struct SystemCatalog {
|
||||
// system_schema_provider for default catalog
|
||||
pub(super) information_schema_provider: Arc<InformationSchemaProvider>,
|
||||
pub(super) pg_catalog_provider: Arc<PGCatalogProvider>,
|
||||
pub(super) numbers_table_provider: NumbersTableProvider,
|
||||
pub(super) backend: KvBackendRef,
|
||||
pub(super) process_manager: Option<ProcessManagerRef>,
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -566,7 +584,9 @@ impl SystemCatalog {
|
||||
PG_CATALOG_NAME if channel == Channel::Postgres => {
|
||||
self.pg_catalog_provider.table_names()
|
||||
}
|
||||
DEFAULT_SCHEMA_NAME => self.numbers_table_provider.table_names(),
|
||||
DEFAULT_SCHEMA_NAME => {
|
||||
vec![NUMBERS_TABLE_NAME.to_string()]
|
||||
}
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
@@ -584,7 +604,7 @@ impl SystemCatalog {
|
||||
if schema == INFORMATION_SCHEMA_NAME {
|
||||
self.information_schema_provider.table(table).is_some()
|
||||
} else if schema == DEFAULT_SCHEMA_NAME {
|
||||
self.numbers_table_provider.table_exists(table)
|
||||
table == NUMBERS_TABLE_NAME
|
||||
} else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
|
||||
self.pg_catalog_provider.table(table).is_some()
|
||||
} else {
|
||||
@@ -629,8 +649,8 @@ impl SystemCatalog {
|
||||
});
|
||||
pg_catalog_provider.table(table_name)
|
||||
}
|
||||
} else if schema == DEFAULT_SCHEMA_NAME {
|
||||
self.numbers_table_provider.table(table_name)
|
||||
} else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
|
||||
Some(NumbersTable::table(NUMBERS_TABLE_ID))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ use crate::{CatalogManager, DeregisterTableRequest, RegisterSchemaRequest, Regis
|
||||
|
||||
type SchemaEntries = HashMap<String, HashMap<String, TableRef>>;
|
||||
|
||||
/// Simple in-memory list of catalogs
|
||||
/// Simple in-memory list of catalogs used for tests.
|
||||
#[derive(Clone)]
|
||||
pub struct MemoryCatalogManager {
|
||||
/// Collection of catalogs containing schemas and ultimately Tables
|
||||
|
||||
@@ -21,17 +21,17 @@ use std::time::{Duration, Instant, UNIX_EPOCH};
|
||||
|
||||
use api::v1::frontend::{KillProcessRequest, ListProcessRequest, ProcessInfo};
|
||||
use common_base::cancellation::CancellationHandle;
|
||||
use common_event_recorder::EventRecorderRef;
|
||||
use common_frontend::selector::{FrontendSelector, MetaClientSelector};
|
||||
use common_frontend::slow_query_event::SlowQueryEvent;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_telemetry::logging::SlowQueriesRecordType;
|
||||
use common_telemetry::{debug, info, slow, warn};
|
||||
use common_time::util::current_time_millis;
|
||||
use meta_client::MetaClientRef;
|
||||
use promql_parser::parser::EvalStmt;
|
||||
use rand::random;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::statements::statement::Statement;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
use crate::error;
|
||||
use crate::metrics::{PROCESS_KILL_COUNT, PROCESS_LIST_COUNT};
|
||||
@@ -249,6 +249,8 @@ pub struct Ticket {
|
||||
pub(crate) manager: ProcessManagerRef,
|
||||
pub(crate) id: ProcessId,
|
||||
pub cancellation_handle: Arc<CancellationHandle>,
|
||||
|
||||
// Keep the handle of the slow query timer to ensure it will trigger the event recording when dropped.
|
||||
_slow_query_timer: Option<SlowQueryTimer>,
|
||||
}
|
||||
|
||||
@@ -295,38 +297,37 @@ impl Debug for CancellableProcess {
|
||||
pub struct SlowQueryTimer {
|
||||
start: Instant,
|
||||
stmt: QueryStatement,
|
||||
query_ctx: QueryContextRef,
|
||||
threshold: Option<Duration>,
|
||||
sample_ratio: Option<f64>,
|
||||
tx: Sender<SlowQueryEvent>,
|
||||
threshold: Duration,
|
||||
sample_ratio: f64,
|
||||
record_type: SlowQueriesRecordType,
|
||||
recorder: EventRecorderRef,
|
||||
}
|
||||
|
||||
impl SlowQueryTimer {
|
||||
pub fn new(
|
||||
stmt: QueryStatement,
|
||||
query_ctx: QueryContextRef,
|
||||
threshold: Option<Duration>,
|
||||
sample_ratio: Option<f64>,
|
||||
tx: Sender<SlowQueryEvent>,
|
||||
threshold: Duration,
|
||||
sample_ratio: f64,
|
||||
record_type: SlowQueriesRecordType,
|
||||
recorder: EventRecorderRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
start: Instant::now(),
|
||||
stmt,
|
||||
query_ctx,
|
||||
threshold,
|
||||
sample_ratio,
|
||||
tx,
|
||||
record_type,
|
||||
recorder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SlowQueryTimer {
|
||||
fn send_slow_query_event(&self, elapsed: Duration, threshold: Duration) {
|
||||
fn send_slow_query_event(&self, elapsed: Duration) {
|
||||
let mut slow_query_event = SlowQueryEvent {
|
||||
cost: elapsed.as_millis() as u64,
|
||||
threshold: threshold.as_millis() as u64,
|
||||
threshold: self.threshold.as_millis() as u64,
|
||||
query: "".to_string(),
|
||||
query_ctx: self.query_ctx.clone(),
|
||||
|
||||
// The following fields are only used for PromQL queries.
|
||||
is_promql: false,
|
||||
@@ -363,29 +364,37 @@ impl SlowQueryTimer {
|
||||
}
|
||||
}
|
||||
|
||||
// Send SlowQueryEvent to the handler.
|
||||
if let Err(e) = self.tx.try_send(slow_query_event) {
|
||||
error!(e; "Failed to send slow query event");
|
||||
match self.record_type {
|
||||
// Send the slow query event to the event recorder to persist it as the system table.
|
||||
SlowQueriesRecordType::SystemTable => {
|
||||
self.recorder.record(Box::new(slow_query_event));
|
||||
}
|
||||
// Record the slow query in a specific logs file.
|
||||
SlowQueriesRecordType::Log => {
|
||||
slow!(
|
||||
cost = slow_query_event.cost,
|
||||
threshold = slow_query_event.threshold,
|
||||
query = slow_query_event.query,
|
||||
is_promql = slow_query_event.is_promql,
|
||||
promql_range = slow_query_event.promql_range,
|
||||
promql_step = slow_query_event.promql_step,
|
||||
promql_start = slow_query_event.promql_start,
|
||||
promql_end = slow_query_event.promql_end,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SlowQueryTimer {
|
||||
fn drop(&mut self) {
|
||||
if let Some(threshold) = self.threshold {
|
||||
// Calculate the elaspsed duration since the timer is created.
|
||||
let elapsed = self.start.elapsed();
|
||||
if elapsed > threshold {
|
||||
if let Some(ratio) = self.sample_ratio {
|
||||
// Only capture a portion of slow queries based on sample_ratio.
|
||||
// Generate a random number in [0, 1) and compare it with sample_ratio.
|
||||
if ratio >= 1.0 || random::<f64>() <= ratio {
|
||||
self.send_slow_query_event(elapsed, threshold);
|
||||
}
|
||||
} else {
|
||||
// Captures all slow queries if sample_ratio is not set.
|
||||
self.send_slow_query_event(elapsed, threshold);
|
||||
}
|
||||
// Calculate the elaspsed duration since the timer is created.
|
||||
let elapsed = self.start.elapsed();
|
||||
if elapsed > self.threshold {
|
||||
// Only capture a portion of slow queries based on sample_ratio.
|
||||
// Generate a random number in [0, 1) and compare it with sample_ratio.
|
||||
if self.sample_ratio >= 1.0 || random::<f64>() <= self.sample_ratio {
|
||||
self.send_slow_query_event(elapsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
pub mod information_schema;
|
||||
mod memory_table;
|
||||
pub mod numbers_table_provider;
|
||||
pub mod pg_catalog;
|
||||
pub mod predicate;
|
||||
mod utils;
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
use common_catalog::consts::NUMBERS_TABLE_ID;
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
use table::table::numbers::NumbersTable;
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
use table::table::numbers::NUMBERS_TABLE_NAME;
|
||||
use table::TableRef;
|
||||
|
||||
// NumbersTableProvider is a dedicated provider for feature-gating the numbers table.
|
||||
#[derive(Clone)]
|
||||
pub struct NumbersTableProvider;
|
||||
|
||||
#[cfg(any(test, feature = "testing", debug_assertions))]
|
||||
impl NumbersTableProvider {
|
||||
pub(crate) fn table_exists(&self, name: &str) -> bool {
|
||||
name == NUMBERS_TABLE_NAME
|
||||
}
|
||||
|
||||
pub(crate) fn table_names(&self) -> Vec<String> {
|
||||
vec![NUMBERS_TABLE_NAME.to_string()]
|
||||
}
|
||||
|
||||
pub(crate) fn table(&self, name: &str) -> Option<TableRef> {
|
||||
if name == NUMBERS_TABLE_NAME {
|
||||
Some(NumbersTable::table(NUMBERS_TABLE_ID))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(test, feature = "testing", debug_assertions)))]
|
||||
impl NumbersTableProvider {
|
||||
pub(crate) fn table_exists(&self, _name: &str) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn table_names(&self) -> Vec<String> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
pub(crate) fn table(&self, _name: &str) -> Option<TableRef> {
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,7 @@ use common_meta::kv_backend::chroot::ChrootKvBackend;
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use meta_srv::bootstrap::create_etcd_client;
|
||||
use meta_srv::metasrv::{BackendClientOptions, BackendImpl};
|
||||
use meta_srv::metasrv::BackendImpl;
|
||||
|
||||
use crate::error::{EmptyStoreAddrsSnafu, UnsupportedMemoryBackendSnafu};
|
||||
|
||||
@@ -67,10 +67,9 @@ impl StoreConfig {
|
||||
} else {
|
||||
let kvbackend = match self.backend {
|
||||
BackendImpl::EtcdStore => {
|
||||
let etcd_client =
|
||||
create_etcd_client(store_addrs, &BackendClientOptions::default())
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
let etcd_client = create_etcd_client(store_addrs)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
|
||||
}
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
|
||||
@@ -29,7 +29,7 @@ use futures::TryStreamExt;
|
||||
|
||||
use crate::error::InvalidArgumentsSnafu;
|
||||
use crate::metadata::common::StoreConfig;
|
||||
use crate::metadata::control::utils::{decode_key_value, get_table_id_by_name, json_formatter};
|
||||
use crate::metadata::control::utils::{decode_key_value, get_table_id_by_name, json_fromatter};
|
||||
use crate::Tool;
|
||||
|
||||
/// Getting metadata from metadata store.
|
||||
@@ -206,7 +206,7 @@ impl Tool for GetTableTool {
|
||||
println!(
|
||||
"{}\n{}",
|
||||
TableInfoKey::new(table_id),
|
||||
json_formatter(self.pretty, &*table_info)
|
||||
json_fromatter(self.pretty, &*table_info)
|
||||
);
|
||||
} else {
|
||||
println!("Table info not found");
|
||||
@@ -221,7 +221,7 @@ impl Tool for GetTableTool {
|
||||
println!(
|
||||
"{}\n{}",
|
||||
TableRouteKey::new(table_id),
|
||||
json_formatter(self.pretty, &table_route)
|
||||
json_fromatter(self.pretty, &table_route)
|
||||
);
|
||||
} else {
|
||||
println!("Table route not found");
|
||||
|
||||
@@ -27,7 +27,7 @@ pub fn decode_key_value(kv: KeyValue) -> CommonMetaResult<(String, String)> {
|
||||
}
|
||||
|
||||
/// Formats a value as a JSON string.
|
||||
pub fn json_formatter<T>(pretty: bool, value: &T) -> String
|
||||
pub fn json_fromatter<T>(pretty: bool, value: &T) -> String
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
|
||||
@@ -279,7 +279,7 @@ impl StartCommand {
|
||||
&opts.component.logging,
|
||||
&opts.component.tracing,
|
||||
opts.component.node_id.clone(),
|
||||
opts.component.slow_query.as_ref(),
|
||||
Some(&opts.component.slow_query),
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
|
||||
@@ -20,7 +20,6 @@ use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_meta::distributed_time_constants::init_distributed_time_constants;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
|
||||
use common_version::{short_version, verbose_version};
|
||||
@@ -328,7 +327,6 @@ impl StartCommand {
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&opts.component.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
init_distributed_time_constants(opts.component.heartbeat_interval);
|
||||
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
|
||||
|
||||
@@ -157,7 +157,7 @@ pub struct StandaloneOptions {
|
||||
pub init_regions_in_background: bool,
|
||||
pub init_regions_parallelism: usize,
|
||||
pub max_in_flight_write_bytes: Option<ReadableSize>,
|
||||
pub slow_query: Option<SlowQueryOptions>,
|
||||
pub slow_query: SlowQueryOptions,
|
||||
pub query: QueryOptions,
|
||||
pub memory: MemoryOptions,
|
||||
}
|
||||
@@ -191,7 +191,7 @@ impl Default for StandaloneOptions {
|
||||
init_regions_in_background: false,
|
||||
init_regions_parallelism: 16,
|
||||
max_in_flight_write_bytes: None,
|
||||
slow_query: Some(SlowQueryOptions::default()),
|
||||
slow_query: SlowQueryOptions::default(),
|
||||
query: QueryOptions::default(),
|
||||
memory: MemoryOptions::default(),
|
||||
}
|
||||
@@ -486,7 +486,7 @@ impl StartCommand {
|
||||
&opts.component.logging,
|
||||
&opts.component.tracing,
|
||||
None,
|
||||
opts.component.slow_query.as_ref(),
|
||||
Some(&opts.component.slow_query),
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
|
||||
@@ -51,6 +51,7 @@ fn test_load_datanode_example_config() {
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_secs(3),
|
||||
heartbeat_timeout: Duration::from_millis(500),
|
||||
ddl_timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(1),
|
||||
tcp_nodelay: true,
|
||||
@@ -115,6 +116,7 @@ fn test_load_frontend_example_config() {
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_secs(3),
|
||||
heartbeat_timeout: Duration::from_millis(500),
|
||||
ddl_timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(1),
|
||||
tcp_nodelay: true,
|
||||
@@ -238,6 +240,7 @@ fn test_load_flownode_example_config() {
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_secs(3),
|
||||
heartbeat_timeout: Duration::from_millis(500),
|
||||
ddl_timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(1),
|
||||
tcp_nodelay: true,
|
||||
|
||||
@@ -12,6 +12,9 @@ common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
itertools.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(duration_constructors)]
|
||||
|
||||
pub mod error;
|
||||
pub mod recorder;
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
@@ -28,6 +28,8 @@ use async_trait::async_trait;
|
||||
use backon::{BackoffBuilder, ExponentialBuilder};
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use humantime::format_duration;
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY};
|
||||
use tokio::sync::mpsc::{channel, Receiver, Sender};
|
||||
@@ -50,12 +52,10 @@ pub const EVENTS_TABLE_TIMESTAMP_COLUMN_NAME: &str = "timestamp";
|
||||
/// EventRecorderRef is the reference to the event recorder.
|
||||
pub type EventRecorderRef = Arc<dyn EventRecorder>;
|
||||
|
||||
static EVENTS_TABLE_TTL: OnceLock<String> = OnceLock::new();
|
||||
|
||||
/// The time interval for flushing batched events to the event handler.
|
||||
pub const DEFAULT_FLUSH_INTERVAL_SECONDS: Duration = Duration::from_secs(5);
|
||||
// The default TTL for the events table.
|
||||
const DEFAULT_EVENTS_TABLE_TTL: &str = "30d";
|
||||
/// The default TTL(90 days) for the events table.
|
||||
const DEFAULT_EVENTS_TABLE_TTL: Duration = Duration::from_days(90);
|
||||
// The capacity of the tokio channel for transmitting events to background processor.
|
||||
const DEFAULT_CHANNEL_SIZE: usize = 2048;
|
||||
// The size of the buffer for batching events before flushing to event handler.
|
||||
@@ -72,6 +72,11 @@ const DEFAULT_MAX_RETRY_TIMES: u64 = 3;
|
||||
///
|
||||
/// The event can also add the extra schema and row to the event by overriding the `extra_schema` and `extra_row` methods.
|
||||
pub trait Event: Send + Sync + Debug {
|
||||
/// Returns the table name of the event.
|
||||
fn table_name(&self) -> &str {
|
||||
DEFAULT_EVENTS_TABLE_NAME
|
||||
}
|
||||
|
||||
/// Returns the type of the event.
|
||||
fn event_type(&self) -> &str;
|
||||
|
||||
@@ -107,88 +112,68 @@ pub trait Eventable: Send + Sync + Debug {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the hints for the insert operation.
|
||||
pub fn insert_hints() -> Vec<(&'static str, &'static str)> {
|
||||
vec![
|
||||
(
|
||||
TTL_KEY,
|
||||
EVENTS_TABLE_TTL
|
||||
.get()
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(DEFAULT_EVENTS_TABLE_TTL),
|
||||
),
|
||||
(APPEND_MODE_KEY, "true"),
|
||||
]
|
||||
/// Groups events by its `event_type`.
|
||||
#[allow(clippy::borrowed_box)]
|
||||
pub fn group_events_by_type(events: &[Box<dyn Event>]) -> HashMap<&str, Vec<&Box<dyn Event>>> {
|
||||
events
|
||||
.iter()
|
||||
.into_grouping_map_by(|event| event.event_type())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Builds the row inserts request for the events that will be persisted to the events table.
|
||||
pub fn build_row_inserts_request(events: &[Box<dyn Event>]) -> Result<RowInsertRequests> {
|
||||
// Aggregate the events by the event type.
|
||||
let mut event_groups: HashMap<&str, Vec<&Box<dyn Event>>> = HashMap::new();
|
||||
/// Builds the row inserts request for the events that will be persisted to the events table. The `events` should have the same event type, or it will return an error.
|
||||
#[allow(clippy::borrowed_box)]
|
||||
pub fn build_row_inserts_request(events: &[&Box<dyn Event>]) -> Result<RowInsertRequests> {
|
||||
// Ensure all the events are the same type.
|
||||
validate_events(events)?;
|
||||
|
||||
// We already validated the events, so it's safe to get the first event to build the schema for the RowInsertRequest.
|
||||
let event = &events[0];
|
||||
let mut schema: Vec<ColumnSchema> = Vec::with_capacity(3 + event.extra_schema().len());
|
||||
schema.extend(vec![
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_TYPE_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String.into(),
|
||||
semantic_type: SemanticType::Tag.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_PAYLOAD_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Binary as i32,
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
datatype_extension: Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_TIMESTAMP_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampNanosecond.into(),
|
||||
semantic_type: SemanticType::Timestamp.into(),
|
||||
..Default::default()
|
||||
},
|
||||
]);
|
||||
schema.extend(event.extra_schema());
|
||||
|
||||
let mut rows: Vec<Row> = Vec::with_capacity(events.len());
|
||||
for event in events {
|
||||
event_groups
|
||||
.entry(event.event_type())
|
||||
.or_default()
|
||||
.push(event);
|
||||
let extra_row = event.extra_row()?;
|
||||
let mut values = Vec::with_capacity(3 + extra_row.values.len());
|
||||
values.extend([
|
||||
ValueData::StringValue(event.event_type().to_string()).into(),
|
||||
ValueData::BinaryValue(event.json_payload()?.into_bytes()).into(),
|
||||
ValueData::TimestampNanosecondValue(event.timestamp().value()).into(),
|
||||
]);
|
||||
values.extend(extra_row.values);
|
||||
rows.push(Row { values });
|
||||
}
|
||||
|
||||
let mut row_insert_requests = RowInsertRequests {
|
||||
inserts: Vec::with_capacity(event_groups.len()),
|
||||
};
|
||||
|
||||
for (_, events) in event_groups {
|
||||
validate_events(&events)?;
|
||||
|
||||
// We already validated the events, so it's safe to get the first event to build the schema for the RowInsertRequest.
|
||||
let event = &events[0];
|
||||
let mut schema = vec![
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_TYPE_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String.into(),
|
||||
semantic_type: SemanticType::Tag.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_PAYLOAD_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Binary as i32,
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
datatype_extension: Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_TIMESTAMP_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampNanosecond.into(),
|
||||
semantic_type: SemanticType::Timestamp.into(),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
schema.extend(event.extra_schema());
|
||||
|
||||
let rows = events
|
||||
.iter()
|
||||
.map(|event| {
|
||||
let mut row = Row {
|
||||
values: vec![
|
||||
ValueData::StringValue(event.event_type().to_string()).into(),
|
||||
ValueData::BinaryValue(event.json_payload()?.as_bytes().to_vec()).into(),
|
||||
ValueData::TimestampNanosecondValue(event.timestamp().value()).into(),
|
||||
],
|
||||
};
|
||||
row.values.extend(event.extra_row()?.values);
|
||||
Ok(row)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
row_insert_requests.inserts.push(RowInsertRequest {
|
||||
table_name: DEFAULT_EVENTS_TABLE_NAME.to_string(),
|
||||
Ok(RowInsertRequests {
|
||||
inserts: vec![RowInsertRequest {
|
||||
table_name: event.table_name().to_string(),
|
||||
rows: Some(Rows { schema, rows }),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(row_insert_requests)
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
// Ensure the events with the same event type have the same extra schema.
|
||||
@@ -217,25 +202,59 @@ pub trait EventRecorder: Send + Sync + Debug + 'static {
|
||||
fn close(&self);
|
||||
}
|
||||
|
||||
/// EventHandlerOptions is the options for the event handler.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct EventHandlerOptions {
|
||||
/// TTL for the events table that will be used to store the events.
|
||||
pub ttl: Duration,
|
||||
/// Append mode for the events table that will be used to store the events.
|
||||
pub append_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for EventHandlerOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ttl: DEFAULT_EVENTS_TABLE_TTL,
|
||||
append_mode: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EventHandlerOptions {
|
||||
/// Converts the options to the hints for the insert operation.
|
||||
pub fn to_hints(&self) -> Vec<(&str, String)> {
|
||||
vec![
|
||||
(TTL_KEY, format_duration(self.ttl).to_string()),
|
||||
(APPEND_MODE_KEY, self.append_mode.to_string()),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/// EventHandler trait defines the interface for how to handle the event.
|
||||
#[async_trait]
|
||||
pub trait EventHandler: Send + Sync + 'static {
|
||||
/// Processes and handles incoming events. The [DefaultEventHandlerImpl] implementation forwards events to frontend instances for persistence.
|
||||
/// We use `&[Box<dyn Event>]` to avoid consuming the events, so the caller can buffer the events and retry if the handler fails.
|
||||
async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()>;
|
||||
|
||||
/// Returns the handler options for the event type. We can use different options for different event types.
|
||||
fn options(&self, _event_type: &str) -> EventHandlerOptions {
|
||||
EventHandlerOptions::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration options for the event recorder.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct EventRecorderOptions {
|
||||
/// TTL for the events table that will be used to store the events.
|
||||
pub ttl: String,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub ttl: Duration,
|
||||
}
|
||||
|
||||
impl Default for EventRecorderOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ttl: DEFAULT_EVENTS_TABLE_TTL.to_string(),
|
||||
ttl: DEFAULT_EVENTS_TABLE_TTL,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -252,9 +271,7 @@ pub struct EventRecorderImpl {
|
||||
}
|
||||
|
||||
impl EventRecorderImpl {
|
||||
pub fn new(event_handler: Box<dyn EventHandler>, opts: EventRecorderOptions) -> Self {
|
||||
info!("Creating event recorder with options: {:?}", opts);
|
||||
|
||||
pub fn new(event_handler: Box<dyn EventHandler>) -> Self {
|
||||
let (tx, rx) = channel(DEFAULT_CHANNEL_SIZE);
|
||||
let cancel_token = CancellationToken::new();
|
||||
|
||||
@@ -279,14 +296,6 @@ impl EventRecorderImpl {
|
||||
|
||||
recorder.handle = Some(handle);
|
||||
|
||||
// It only sets the ttl once, so it's safe to skip the error.
|
||||
if EVENTS_TABLE_TTL.set(opts.ttl.clone()).is_err() {
|
||||
info!(
|
||||
"Events table ttl already set to {}, skip setting it",
|
||||
opts.ttl
|
||||
);
|
||||
}
|
||||
|
||||
recorder
|
||||
}
|
||||
}
|
||||
@@ -471,10 +480,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_event_recorder() {
|
||||
let mut event_recorder = EventRecorderImpl::new(
|
||||
Box::new(TestEventHandlerImpl {}),
|
||||
EventRecorderOptions::default(),
|
||||
);
|
||||
let mut event_recorder = EventRecorderImpl::new(Box::new(TestEventHandlerImpl {}));
|
||||
event_recorder.record(Box::new(TestEvent {}));
|
||||
|
||||
// Sleep for a while to let the event be sent to the event handler.
|
||||
@@ -515,10 +521,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_event_recorder_should_panic() {
|
||||
let mut event_recorder = EventRecorderImpl::new(
|
||||
Box::new(TestEventHandlerImplShouldPanic {}),
|
||||
EventRecorderOptions::default(),
|
||||
);
|
||||
let mut event_recorder =
|
||||
EventRecorderImpl::new(Box::new(TestEventHandlerImplShouldPanic {}));
|
||||
|
||||
event_recorder.record(Box::new(TestEvent {}));
|
||||
|
||||
@@ -535,4 +539,135 @@ mod tests {
|
||||
assert!(handle.await.unwrap_err().is_panic());
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestEventA {}
|
||||
|
||||
impl Event for TestEventA {
|
||||
fn event_type(&self) -> &str {
|
||||
"A"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestEventB {}
|
||||
|
||||
impl Event for TestEventB {
|
||||
fn table_name(&self) -> &str {
|
||||
"table_B"
|
||||
}
|
||||
|
||||
fn event_type(&self) -> &str {
|
||||
"B"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestEventC {}
|
||||
|
||||
impl Event for TestEventC {
|
||||
fn table_name(&self) -> &str {
|
||||
"table_C"
|
||||
}
|
||||
|
||||
fn event_type(&self) -> &str {
|
||||
"C"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_group_events_by_type() {
|
||||
let events: Vec<Box<dyn Event>> = vec![
|
||||
Box::new(TestEventA {}),
|
||||
Box::new(TestEventB {}),
|
||||
Box::new(TestEventA {}),
|
||||
Box::new(TestEventC {}),
|
||||
Box::new(TestEventB {}),
|
||||
Box::new(TestEventC {}),
|
||||
Box::new(TestEventA {}),
|
||||
];
|
||||
|
||||
let event_groups = group_events_by_type(&events);
|
||||
assert_eq!(event_groups.len(), 3);
|
||||
assert_eq!(event_groups.get("A").unwrap().len(), 3);
|
||||
assert_eq!(event_groups.get("B").unwrap().len(), 2);
|
||||
assert_eq!(event_groups.get("C").unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_row_inserts_request() {
|
||||
let events: Vec<Box<dyn Event>> = vec![
|
||||
Box::new(TestEventA {}),
|
||||
Box::new(TestEventB {}),
|
||||
Box::new(TestEventA {}),
|
||||
Box::new(TestEventC {}),
|
||||
Box::new(TestEventB {}),
|
||||
Box::new(TestEventC {}),
|
||||
Box::new(TestEventA {}),
|
||||
];
|
||||
|
||||
let event_groups = group_events_by_type(&events);
|
||||
assert_eq!(event_groups.len(), 3);
|
||||
assert_eq!(event_groups.get("A").unwrap().len(), 3);
|
||||
assert_eq!(event_groups.get("B").unwrap().len(), 2);
|
||||
assert_eq!(event_groups.get("C").unwrap().len(), 2);
|
||||
|
||||
for (event_type, events) in event_groups {
|
||||
let row_inserts_request = build_row_inserts_request(&events).unwrap();
|
||||
if event_type == "A" {
|
||||
assert_eq!(row_inserts_request.inserts.len(), 1);
|
||||
assert_eq!(
|
||||
row_inserts_request.inserts[0].table_name,
|
||||
DEFAULT_EVENTS_TABLE_NAME
|
||||
);
|
||||
assert_eq!(
|
||||
row_inserts_request.inserts[0]
|
||||
.rows
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.rows
|
||||
.len(),
|
||||
3
|
||||
);
|
||||
} else if event_type == "B" {
|
||||
assert_eq!(row_inserts_request.inserts.len(), 1);
|
||||
assert_eq!(row_inserts_request.inserts[0].table_name, "table_B");
|
||||
assert_eq!(
|
||||
row_inserts_request.inserts[0]
|
||||
.rows
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.rows
|
||||
.len(),
|
||||
2
|
||||
);
|
||||
} else if event_type == "C" {
|
||||
assert_eq!(row_inserts_request.inserts.len(), 1);
|
||||
assert_eq!(row_inserts_request.inserts[0].table_name, "table_C");
|
||||
assert_eq!(
|
||||
row_inserts_request.inserts[0]
|
||||
.rows
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.rows
|
||||
.len(),
|
||||
2
|
||||
);
|
||||
} else {
|
||||
panic!("Unexpected event type: {}", event_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,13 +5,18 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
async-trait.workspace = true
|
||||
common-error.workspace = true
|
||||
common-event-recorder.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-time.workspace = true
|
||||
greptime-proto.workspace = true
|
||||
humantime.workspace = true
|
||||
meta-client.workspace = true
|
||||
serde.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
@@ -12,17 +12,121 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use session::context::QueryContextRef;
|
||||
use std::any::Any;
|
||||
|
||||
#[derive(Debug)]
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, ColumnSchema, Row, SemanticType};
|
||||
use common_event_recorder::error::Result;
|
||||
use common_event_recorder::Event;
|
||||
use serde::Serialize;
|
||||
|
||||
pub const SLOW_QUERY_TABLE_NAME: &str = "slow_queries";
|
||||
pub const SLOW_QUERY_TABLE_COST_COLUMN_NAME: &str = "cost";
|
||||
pub const SLOW_QUERY_TABLE_THRESHOLD_COLUMN_NAME: &str = "threshold";
|
||||
pub const SLOW_QUERY_TABLE_QUERY_COLUMN_NAME: &str = "query";
|
||||
pub const SLOW_QUERY_TABLE_TIMESTAMP_COLUMN_NAME: &str = "timestamp";
|
||||
pub const SLOW_QUERY_TABLE_IS_PROMQL_COLUMN_NAME: &str = "is_promql";
|
||||
pub const SLOW_QUERY_TABLE_PROMQL_START_COLUMN_NAME: &str = "promql_start";
|
||||
pub const SLOW_QUERY_TABLE_PROMQL_END_COLUMN_NAME: &str = "promql_end";
|
||||
pub const SLOW_QUERY_TABLE_PROMQL_RANGE_COLUMN_NAME: &str = "promql_range";
|
||||
pub const SLOW_QUERY_TABLE_PROMQL_STEP_COLUMN_NAME: &str = "promql_step";
|
||||
pub const SLOW_QUERY_EVENT_TYPE: &str = "slow_query";
|
||||
|
||||
/// SlowQueryEvent is the event of slow query.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct SlowQueryEvent {
|
||||
pub cost: u64,
|
||||
pub threshold: u64,
|
||||
pub query: String,
|
||||
pub is_promql: bool,
|
||||
pub query_ctx: QueryContextRef,
|
||||
pub promql_range: Option<u64>,
|
||||
pub promql_step: Option<u64>,
|
||||
pub promql_start: Option<i64>,
|
||||
pub promql_end: Option<i64>,
|
||||
}
|
||||
|
||||
impl Event for SlowQueryEvent {
|
||||
fn table_name(&self) -> &str {
|
||||
SLOW_QUERY_TABLE_NAME
|
||||
}
|
||||
|
||||
fn event_type(&self) -> &str {
|
||||
SLOW_QUERY_EVENT_TYPE
|
||||
}
|
||||
|
||||
fn extra_schema(&self) -> Vec<ColumnSchema> {
|
||||
vec![
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_COST_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Uint64.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_THRESHOLD_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Uint64.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_QUERY_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_IS_PROMQL_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Boolean.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_PROMQL_RANGE_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Uint64.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_PROMQL_STEP_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Uint64.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_PROMQL_START_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: SLOW_QUERY_TABLE_PROMQL_END_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond.into(),
|
||||
semantic_type: SemanticType::Field.into(),
|
||||
..Default::default()
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn extra_row(&self) -> Result<Row> {
|
||||
Ok(Row {
|
||||
values: vec![
|
||||
ValueData::U64Value(self.cost).into(),
|
||||
ValueData::U64Value(self.threshold).into(),
|
||||
ValueData::StringValue(self.query.to_string()).into(),
|
||||
ValueData::BoolValue(self.is_promql).into(),
|
||||
ValueData::U64Value(self.promql_range.unwrap_or(0)).into(),
|
||||
ValueData::U64Value(self.promql_step.unwrap_or(0)).into(),
|
||||
ValueData::TimestampMillisecondValue(self.promql_start.unwrap_or(0)).into(),
|
||||
ValueData::TimestampMillisecondValue(self.promql_end.unwrap_or(0)).into(),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn json_payload(&self) -> Result<String> {
|
||||
Ok("".to_string())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
@@ -332,7 +332,7 @@ impl AggregateUDFImpl for StateWrapper {
|
||||
self.inner.signature()
|
||||
}
|
||||
|
||||
/// Coerce types also do nothing, as optimizer should be able to already make struct types
|
||||
/// Coerce types also do nothing, as optimzer should be able to already make struct types
|
||||
fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
|
||||
self.inner.coerce_types(arg_types)
|
||||
}
|
||||
@@ -486,7 +486,7 @@ impl AggregateUDFImpl for MergeWrapper {
|
||||
&self.merge_signature
|
||||
}
|
||||
|
||||
/// Coerce types also do nothing, as optimizer should be able to already make struct types
|
||||
/// Coerce types also do nothing, as optimzer should be able to already make struct types
|
||||
fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
|
||||
// just check if the arg_types are only one and is struct array
|
||||
if arg_types.len() != 1 || !matches!(arg_types.first(), Some(DataType::Struct(_))) {
|
||||
|
||||
@@ -63,7 +63,10 @@ pub struct Stat {
|
||||
pub wcus: i64,
|
||||
/// How many regions on this node
|
||||
pub region_num: u64,
|
||||
/// The region stats of the datanode.
|
||||
pub region_stats: Vec<RegionStat>,
|
||||
/// The topic stats of the datanode.
|
||||
pub topic_stats: Vec<TopicStat>,
|
||||
// The node epoch is used to check whether the node has restarted or redeployed.
|
||||
pub node_epoch: u64,
|
||||
/// The datanode workloads.
|
||||
@@ -108,6 +111,24 @@ pub struct RegionStat {
|
||||
pub metadata_topic_latest_entry_id: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopicStat {
|
||||
/// The topic name.
|
||||
pub topic: String,
|
||||
/// The latest entry id of the topic.
|
||||
pub latest_entry_id: u64,
|
||||
/// The total size in bytes of records appended to the topic.
|
||||
pub record_size: u64,
|
||||
/// The total number of records appended to the topic.
|
||||
pub record_num: u64,
|
||||
}
|
||||
|
||||
/// Trait for reporting statistics about topics.
|
||||
pub trait TopicStatsReporter: Send + Sync {
|
||||
/// Returns a list of topic statistics that can be reported.
|
||||
fn reportable_topics(&mut self) -> Vec<TopicStat>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub enum RegionManifestInfo {
|
||||
Mito {
|
||||
@@ -203,6 +224,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
region_stats,
|
||||
node_epoch,
|
||||
node_workloads,
|
||||
topic_stats,
|
||||
..
|
||||
} = value;
|
||||
|
||||
@@ -212,6 +234,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
.iter()
|
||||
.map(RegionStat::from)
|
||||
.collect::<Vec<_>>();
|
||||
let topic_stats = topic_stats.iter().map(TopicStat::from).collect::<Vec<_>>();
|
||||
|
||||
let datanode_workloads = get_datanode_workloads(node_workloads.as_ref());
|
||||
Ok(Self {
|
||||
@@ -224,6 +247,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
|
||||
wcus: region_stats.iter().map(|s| s.wcus).sum(),
|
||||
region_num: region_stats.len() as u64,
|
||||
region_stats,
|
||||
topic_stats,
|
||||
node_epoch: *node_epoch,
|
||||
datanode_workloads,
|
||||
})
|
||||
@@ -286,6 +310,17 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&api::v1::meta::TopicStat> for TopicStat {
|
||||
fn from(value: &api::v1::meta::TopicStat) -> Self {
|
||||
Self {
|
||||
topic: value.topic_name.clone(),
|
||||
latest_entry_id: value.latest_entry_id,
|
||||
record_size: value.record_size,
|
||||
record_num: value.record_num,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The key of the datanode stat in the memory store.
|
||||
///
|
||||
/// The format is `__meta_datanode_stat-0-{node_id}`.
|
||||
|
||||
@@ -68,6 +68,7 @@ impl CreateLogicalTablesProcedure {
|
||||
physical_table_id,
|
||||
physical_region_numbers: vec![],
|
||||
physical_columns: vec![],
|
||||
physical_partition_columns: vec![],
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -91,6 +92,8 @@ impl CreateLogicalTablesProcedure {
|
||||
self.check_input_tasks()?;
|
||||
// Sets physical region numbers
|
||||
self.fill_physical_table_info().await?;
|
||||
// Add partition columns from physical table to logical table schemas
|
||||
self.merge_partition_columns_into_logical_tables()?;
|
||||
// Checks if the tables exist
|
||||
self.check_tables_already_exist().await?;
|
||||
|
||||
@@ -257,6 +260,7 @@ pub struct CreateTablesData {
|
||||
physical_table_id: TableId,
|
||||
physical_region_numbers: Vec<RegionNumber>,
|
||||
physical_columns: Vec<ColumnMetadata>,
|
||||
physical_partition_columns: Vec<String>,
|
||||
}
|
||||
|
||||
impl CreateTablesData {
|
||||
|
||||
@@ -12,6 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
use crate::error::Result;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
@@ -28,6 +34,89 @@ impl CreateLogicalTablesProcedure {
|
||||
|
||||
self.data.physical_region_numbers = physical_region_numbers;
|
||||
|
||||
// Extract partition column names from the physical table
|
||||
let physical_table_info = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(self.data.physical_table_id)
|
||||
.await?
|
||||
.with_context(|| crate::error::TableInfoNotFoundSnafu {
|
||||
table: format!("physical table {}", self.data.physical_table_id),
|
||||
})?;
|
||||
|
||||
let physical_partition_columns: Vec<String> = physical_table_info
|
||||
.table_info
|
||||
.meta
|
||||
.partition_key_indices
|
||||
.iter()
|
||||
.map(|&idx| {
|
||||
physical_table_info.table_info.meta.schema.column_schemas[idx]
|
||||
.name
|
||||
.clone()
|
||||
})
|
||||
.collect();
|
||||
|
||||
self.data.physical_partition_columns = physical_partition_columns;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn merge_partition_columns_into_logical_tables(&mut self) -> Result<()> {
|
||||
let partition_columns = &self.data.physical_partition_columns;
|
||||
|
||||
// Skip if no partition columns to add
|
||||
if partition_columns.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for task in &mut self.data.tasks {
|
||||
// Get existing column names in the logical table
|
||||
let existing_column_names: HashSet<_> = task
|
||||
.table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas
|
||||
.iter()
|
||||
.map(|c| &c.name)
|
||||
.collect();
|
||||
|
||||
let mut new_columns = Vec::new();
|
||||
let mut new_primary_key_indices = task.table_info.meta.primary_key_indices.clone();
|
||||
|
||||
// Add missing partition columns
|
||||
for partition_column in partition_columns {
|
||||
if !existing_column_names.contains(partition_column) {
|
||||
let new_column_index =
|
||||
task.table_info.meta.schema.column_schemas.len() + new_columns.len();
|
||||
|
||||
// Create new column schema for the partition column
|
||||
let column_schema = ColumnSchema::new(
|
||||
partition_column.clone(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
);
|
||||
new_columns.push(column_schema);
|
||||
|
||||
// Add to primary key indices (partition columns are part of primary key)
|
||||
new_primary_key_indices.push(new_column_index);
|
||||
}
|
||||
}
|
||||
|
||||
// If we added new columns, update the table info
|
||||
if !new_columns.is_empty() {
|
||||
let mut updated_columns = task.table_info.meta.schema.column_schemas.clone();
|
||||
updated_columns.extend(new_columns);
|
||||
|
||||
// Create new schema with updated columns
|
||||
let new_schema = RawSchema::new(updated_columns);
|
||||
|
||||
// Update the table info
|
||||
task.table_info.meta.schema = new_schema;
|
||||
task.table_info.meta.primary_key_indices = new_primary_key_indices;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -19,9 +19,12 @@ use api::v1::CreateTableExpr;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
use crate::ddl::create_table_template::{build_template, CreateRequestBuilder};
|
||||
use crate::ddl::create_table_template::{
|
||||
build_template, build_template_from_raw_table_info, CreateRequestBuilder,
|
||||
};
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::Result;
|
||||
use crate::peer::Peer;
|
||||
@@ -47,8 +50,10 @@ impl CreateLogicalTablesProcedure {
|
||||
let logical_table_id = task.table_info.ident.table_id;
|
||||
let physical_table_id = self.data.physical_table_id;
|
||||
let storage_path = region_storage_path(catalog, schema);
|
||||
let request_builder =
|
||||
create_region_request_builder(&task.create_table, physical_table_id)?;
|
||||
let request_builder = create_region_request_builder_from_raw_table_info(
|
||||
&task.table_info,
|
||||
physical_table_id,
|
||||
)?;
|
||||
|
||||
for region_number in ®ions_on_this_peer {
|
||||
let region_id = RegionId::new(logical_table_id, *region_number);
|
||||
@@ -73,7 +78,7 @@ impl CreateLogicalTablesProcedure {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a region request builder.
|
||||
/// Creates a region request builder
|
||||
pub fn create_region_request_builder(
|
||||
create_table_expr: &CreateTableExpr,
|
||||
physical_table_id: TableId,
|
||||
@@ -81,3 +86,14 @@ pub fn create_region_request_builder(
|
||||
let template = build_template(create_table_expr)?;
|
||||
Ok(CreateRequestBuilder::new(template, Some(physical_table_id)))
|
||||
}
|
||||
|
||||
/// Builds a [CreateRequestBuilder] from a [RawTableInfo].
|
||||
///
|
||||
/// Note: **This method is only used for creating logical tables.**
|
||||
pub fn create_region_request_builder_from_raw_table_info(
|
||||
raw_table_info: &RawTableInfo,
|
||||
physical_table_id: TableId,
|
||||
) -> Result<CreateRequestBuilder> {
|
||||
let template = build_template_from_raw_table_info(raw_table_info)?;
|
||||
Ok(CreateRequestBuilder::new(template, Some(physical_table_id)))
|
||||
}
|
||||
|
||||
@@ -12,10 +12,25 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Duration;
|
||||
|
||||
pub const BASE_HEARTBEAT_INTERVAL: Duration = Duration::from_secs(3);
|
||||
/// Heartbeat interval time (is the basic unit of various time).
|
||||
pub const HEARTBEAT_INTERVAL_MILLIS: u64 = 3000;
|
||||
|
||||
/// The frontend will also send heartbeats to Metasrv, sending an empty
|
||||
/// heartbeat every HEARTBEAT_INTERVAL_MILLIS * 6 seconds.
|
||||
pub const FRONTEND_HEARTBEAT_INTERVAL_MILLIS: u64 = HEARTBEAT_INTERVAL_MILLIS * 6;
|
||||
|
||||
/// The lease seconds of a region. It's set by 3 heartbeat intervals
|
||||
/// (HEARTBEAT_INTERVAL_MILLIS × 3), plus some extra buffer (1 second).
|
||||
pub const REGION_LEASE_SECS: u64 =
|
||||
Duration::from_millis(HEARTBEAT_INTERVAL_MILLIS * 3).as_secs() + 1;
|
||||
|
||||
/// When creating table or region failover, a target node needs to be selected.
|
||||
/// If the node's lease has expired, the `Selector` will not select it.
|
||||
pub const DATANODE_LEASE_SECS: u64 = REGION_LEASE_SECS;
|
||||
|
||||
pub const FLOWNODE_LEASE_SECS: u64 = DATANODE_LEASE_SECS;
|
||||
|
||||
/// The lease seconds of metasrv leader.
|
||||
pub const META_LEASE_SECS: u64 = 5;
|
||||
@@ -26,73 +41,11 @@ pub const POSTGRES_KEEP_ALIVE_SECS: u64 = 30;
|
||||
/// In a lease, there are two opportunities for renewal.
|
||||
pub const META_KEEP_ALIVE_INTERVAL_SECS: u64 = META_LEASE_SECS / 2;
|
||||
|
||||
/// The timeout of the heartbeat request.
|
||||
pub const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
|
||||
|
||||
/// The keep-alive interval of the heartbeat channel.
|
||||
pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS: Duration =
|
||||
Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
|
||||
|
||||
/// The keep-alive timeout of the heartbeat channel.
|
||||
pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS: Duration =
|
||||
Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
|
||||
|
||||
/// The default mailbox round-trip timeout.
|
||||
pub const MAILBOX_RTT_SECS: u64 = 1;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
/// The distributed time constants.
|
||||
pub struct DistributedTimeConstants {
|
||||
pub heartbeat_interval: Duration,
|
||||
pub frontend_heartbeat_interval: Duration,
|
||||
pub region_lease: Duration,
|
||||
pub datanode_lease: Duration,
|
||||
pub flownode_lease: Duration,
|
||||
}
|
||||
/// The interval of reporting topic stats.
|
||||
pub const TOPIC_STATS_REPORT_INTERVAL_SECS: u64 = 15;
|
||||
|
||||
/// The frontend heartbeat interval is 6 times of the base heartbeat interval.
|
||||
pub fn frontend_heartbeat_interval(base_heartbeat_interval: Duration) -> Duration {
|
||||
base_heartbeat_interval * 6
|
||||
}
|
||||
|
||||
impl DistributedTimeConstants {
|
||||
/// Create a new DistributedTimeConstants from the heartbeat interval.
|
||||
pub fn from_heartbeat_interval(heartbeat_interval: Duration) -> Self {
|
||||
let region_lease = heartbeat_interval * 3 + Duration::from_secs(1);
|
||||
let datanode_lease = region_lease;
|
||||
let flownode_lease = datanode_lease;
|
||||
Self {
|
||||
heartbeat_interval,
|
||||
frontend_heartbeat_interval: frontend_heartbeat_interval(heartbeat_interval),
|
||||
region_lease,
|
||||
datanode_lease,
|
||||
flownode_lease,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DistributedTimeConstants {
|
||||
fn default() -> Self {
|
||||
Self::from_heartbeat_interval(BASE_HEARTBEAT_INTERVAL)
|
||||
}
|
||||
}
|
||||
|
||||
static DEFAULT_DISTRIBUTED_TIME_CONSTANTS: OnceLock<DistributedTimeConstants> = OnceLock::new();
|
||||
|
||||
/// Get the default distributed time constants.
|
||||
pub fn default_distributed_time_constants() -> &'static DistributedTimeConstants {
|
||||
DEFAULT_DISTRIBUTED_TIME_CONSTANTS.get_or_init(Default::default)
|
||||
}
|
||||
|
||||
/// Initialize the default distributed time constants.
|
||||
pub fn init_distributed_time_constants(base_heartbeat_interval: Duration) {
|
||||
let distributed_time_constants =
|
||||
DistributedTimeConstants::from_heartbeat_interval(base_heartbeat_interval);
|
||||
DEFAULT_DISTRIBUTED_TIME_CONSTANTS
|
||||
.set(distributed_time_constants)
|
||||
.expect("Failed to set default distributed time constants");
|
||||
common_telemetry::info!(
|
||||
"Initialized default distributed time constants: {:#?}",
|
||||
distributed_time_constants
|
||||
);
|
||||
}
|
||||
/// The retention seconds of topic stats.
|
||||
pub const TOPIC_STATS_RETENTION_SECS: u64 = TOPIC_STATS_REPORT_INTERVAL_SECS * 100;
|
||||
|
||||
@@ -375,6 +375,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Region not found: {}", region_id))]
|
||||
RegionNotFound {
|
||||
region_id: RegionId,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("View not found: '{}'", view_name))]
|
||||
ViewNotFound {
|
||||
view_name: String,
|
||||
@@ -528,6 +535,9 @@ pub enum Error {
|
||||
source: common_wal::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to resolve Kafka broker endpoint."))]
|
||||
ResolveKafkaEndpoint { source: common_wal::error::Error },
|
||||
|
||||
#[snafu(display("Failed to build a Kafka controller client"))]
|
||||
BuildKafkaCtrlClient {
|
||||
#[snafu(implicit)]
|
||||
@@ -984,6 +994,39 @@ pub enum Error {
|
||||
table_name: String,
|
||||
table_id: TableId,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column not found in column metadata, column_name: {}, column_id: {}",
|
||||
column_name,
|
||||
column_id
|
||||
))]
|
||||
ColumnNotFound { column_name: String, column_id: u32 },
|
||||
|
||||
#[snafu(display(
|
||||
"Column id mismatch, column_name: {}, expected column_id: {}, actual column_id: {}",
|
||||
column_name,
|
||||
expected_column_id,
|
||||
actual_column_id
|
||||
))]
|
||||
ColumnIdMismatch {
|
||||
column_name: String,
|
||||
expected_column_id: u32,
|
||||
actual_column_id: u32,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Timestamp column mismatch, expected column_name: {}, expected column_id: {}, actual column_name: {}, actual column_id: {}",
|
||||
expected_column_name,
|
||||
expected_column_id,
|
||||
actual_column_name,
|
||||
actual_column_id,
|
||||
))]
|
||||
TimestampMismatch {
|
||||
expected_column_name: String,
|
||||
expected_column_id: u32,
|
||||
actual_column_name: String,
|
||||
actual_column_id: u32,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -1009,7 +1052,10 @@ impl ErrorExt for Error {
|
||||
| MissingColumnIds { .. }
|
||||
| MissingColumnInColumnMetadata { .. }
|
||||
| MismatchColumnId { .. }
|
||||
| ColumnMetadataConflicts { .. } => StatusCode::Unexpected,
|
||||
| ColumnMetadataConflicts { .. }
|
||||
| ColumnNotFound { .. }
|
||||
| ColumnIdMismatch { .. }
|
||||
| TimestampMismatch { .. } => StatusCode::Unexpected,
|
||||
|
||||
Unsupported { .. } => StatusCode::Unsupported,
|
||||
WriteObject { .. } | ReadObject { .. } => StatusCode::StorageUnavailable,
|
||||
@@ -1037,6 +1083,7 @@ impl ErrorExt for Error {
|
||||
| BuildKafkaClient { .. }
|
||||
| BuildKafkaCtrlClient { .. }
|
||||
| KafkaPartitionClient { .. }
|
||||
| ResolveKafkaEndpoint { .. }
|
||||
| ProduceRecord { .. }
|
||||
| CreateKafkaWalTopic { .. }
|
||||
| EmptyTopicPool { .. }
|
||||
@@ -1076,7 +1123,9 @@ impl ErrorExt for Error {
|
||||
FlowRouteNotFound { .. } => StatusCode::Unexpected,
|
||||
FlowAlreadyExists { .. } => StatusCode::FlowAlreadyExists,
|
||||
|
||||
ViewNotFound { .. } | TableNotFound { .. } => StatusCode::TableNotFound,
|
||||
ViewNotFound { .. } | TableNotFound { .. } | RegionNotFound { .. } => {
|
||||
StatusCode::TableNotFound
|
||||
}
|
||||
ViewAlreadyExists { .. } | TableAlreadyExists { .. } => StatusCode::TableAlreadyExists,
|
||||
|
||||
SubmitProcedure { source, .. }
|
||||
|
||||
@@ -22,8 +22,8 @@ use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::{
|
||||
InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu, TableRouteNotFoundSnafu,
|
||||
UnexpectedLogicalRouteTableSnafu,
|
||||
InvalidMetadataSnafu, MetadataCorruptionSnafu, RegionNotFoundSnafu, Result, SerdeJsonSnafu,
|
||||
TableRouteNotFoundSnafu, UnexpectedLogicalRouteTableSnafu,
|
||||
};
|
||||
use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
|
||||
use crate::key::txn_helper::TxnOpGetResponseSet;
|
||||
@@ -455,6 +455,101 @@ impl TableRouteManager {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
/// Sets the staging state for a specific region.
|
||||
///
|
||||
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
|
||||
/// - the table does not exist
|
||||
/// - the region is not found in the table
|
||||
pub async fn set_region_staging_state(
|
||||
&self,
|
||||
region_id: store_api::storage::RegionId,
|
||||
staging: bool,
|
||||
) -> Result<()> {
|
||||
let table_id = region_id.table_id();
|
||||
|
||||
// Get current table route with raw bytes for CAS operation
|
||||
let current_table_route = self
|
||||
.storage
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await?
|
||||
.context(TableRouteNotFoundSnafu { table_id })?;
|
||||
|
||||
// Clone the current route value and update the specific region
|
||||
let new_table_route = current_table_route.inner.clone();
|
||||
|
||||
// Only physical tables have region routes
|
||||
ensure!(
|
||||
new_table_route.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("Cannot set staging state for logical table {table_id}"),
|
||||
}
|
||||
);
|
||||
|
||||
let region_routes = new_table_route.region_routes()?.clone();
|
||||
let mut updated_routes = region_routes.clone();
|
||||
|
||||
// Find and update the specific region
|
||||
// TODO(ruihang): maybe update them in one transaction
|
||||
let mut region_found = false;
|
||||
for route in &mut updated_routes {
|
||||
if route.region.id == region_id {
|
||||
if staging {
|
||||
route.set_leader_staging();
|
||||
} else {
|
||||
route.clear_leader_staging();
|
||||
}
|
||||
region_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ensure!(region_found, RegionNotFoundSnafu { region_id });
|
||||
|
||||
// Create new table route with updated region routes
|
||||
let updated_table_route = new_table_route.update(updated_routes)?;
|
||||
|
||||
// Execute atomic update
|
||||
let (txn, _) =
|
||||
self.storage
|
||||
.build_update_txn(table_id, ¤t_table_route, &updated_table_route)?;
|
||||
|
||||
let result = self.storage.kv_backend.txn(txn).await?;
|
||||
|
||||
ensure!(
|
||||
result.succeeded,
|
||||
MetadataCorruptionSnafu {
|
||||
err_msg: format!(
|
||||
"Failed to update staging state for region {}: CAS operation failed",
|
||||
region_id
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks if a specific region is in staging state.
|
||||
///
|
||||
/// Returns false if the table/region doesn't exist.
|
||||
pub async fn is_region_staging(&self, region_id: store_api::storage::RegionId) -> Result<bool> {
|
||||
let table_id = region_id.table_id();
|
||||
|
||||
let table_route = self.storage.get(table_id).await?;
|
||||
|
||||
match table_route {
|
||||
Some(route) if route.is_physical() => {
|
||||
let region_routes = route.region_routes()?;
|
||||
for route in region_routes {
|
||||
if route.region.id == region_id {
|
||||
return Ok(route.is_leader_staging());
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
_ => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns low-level APIs.
|
||||
pub fn table_route_storage(&self) -> &TableRouteStorage {
|
||||
&self.storage
|
||||
|
||||
@@ -46,6 +46,7 @@ pub mod rpc;
|
||||
pub mod sequence;
|
||||
pub mod snapshot;
|
||||
pub mod state_store;
|
||||
pub mod stats;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
pub mod util;
|
||||
|
||||
@@ -15,6 +15,13 @@
|
||||
use lazy_static::lazy_static;
|
||||
use prometheus::*;
|
||||
|
||||
pub const TABLE_TYPE_PHYSICAL: &str = "physical";
|
||||
pub const TABLE_TYPE_LOGICAL: &str = "logical";
|
||||
pub const ERROR_TYPE_RETRYABLE: &str = "retryable";
|
||||
pub const ERROR_TYPE_EXTERNAL: &str = "external";
|
||||
pub const STATS_TYPE_NO_REGION_METADATA: &str = "no_region_metadata";
|
||||
pub const STATS_TYPE_REGION_NOT_OPEN: &str = "region_not_open";
|
||||
|
||||
lazy_static! {
|
||||
pub static ref METRIC_META_TXN_REQUEST: HistogramVec = register_histogram_vec!(
|
||||
"greptime_meta_txn_request",
|
||||
@@ -114,4 +121,39 @@ lazy_static! {
|
||||
&["backend", "result", "op", "type"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION: HistogramVec =
|
||||
register_histogram_vec!(
|
||||
"greptime_meta_reconciliation_list_region_metadata_duration",
|
||||
"reconciliation list region metadata duration",
|
||||
&["table_type"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_RECONCILIATION_RESOLVED_COLUMN_METADATA: IntCounterVec =
|
||||
register_int_counter_vec!(
|
||||
"greptime_meta_reconciliation_resolved_column_metadata",
|
||||
"reconciliation resolved column metadata",
|
||||
&["strategy"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_RECONCILIATION_STATS: IntCounterVec =
|
||||
register_int_counter_vec!(
|
||||
"greptime_meta_reconciliation_stats",
|
||||
"reconciliation stats",
|
||||
&["procedure_name", "table_type", "type"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_RECONCILIATION_PROCEDURE: HistogramVec =
|
||||
register_histogram_vec!(
|
||||
"greptime_meta_reconciliation_procedure",
|
||||
"reconcile table procedure",
|
||||
&["procedure_name", "step"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_META_RECONCILIATION_PROCEDURE_ERROR: IntCounterVec =
|
||||
register_int_counter_vec!(
|
||||
"greptime_meta_reconciliation_procedure_error",
|
||||
"reconciliation procedure error",
|
||||
&["procedure_name", "step", "error_type"]
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
@@ -18,3 +18,4 @@ pub(crate) mod reconcile_database;
|
||||
pub(crate) mod reconcile_logical_tables;
|
||||
pub(crate) mod reconcile_table;
|
||||
pub(crate) mod utils;
|
||||
pub use reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
|
||||
@@ -117,7 +117,8 @@ impl ReconciliationManager {
|
||||
.await?;
|
||||
|
||||
if physical_table_id == table_id {
|
||||
Ok(self.reconcile_physical_table(table_id, table_ref.into(), resolve_strategy))
|
||||
self.reconcile_physical_table(table_id, table_ref.into(), resolve_strategy)
|
||||
.await
|
||||
} else {
|
||||
let physical_table_info = table_metadata_manager
|
||||
.table_info_manager()
|
||||
@@ -127,24 +128,25 @@ impl ReconciliationManager {
|
||||
table_name: format!("table_id: {}", physical_table_id),
|
||||
})?;
|
||||
|
||||
Ok(self.reconcile_logical_tables(
|
||||
self.reconcile_logical_tables(
|
||||
physical_table_id,
|
||||
physical_table_info.table_name(),
|
||||
vec![(table_id, table_ref.into())],
|
||||
))
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// Reconcile a database.
|
||||
///
|
||||
/// Returns the procedure id of the reconciliation procedure.
|
||||
pub fn reconcile_database(
|
||||
pub async fn reconcile_database(
|
||||
&self,
|
||||
catalog: String,
|
||||
schema: String,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
parallelism: usize,
|
||||
) -> ProcedureId {
|
||||
) -> Result<ProcedureId> {
|
||||
let parallelism = normalize_parallelism(parallelism);
|
||||
let procedure = ReconcileDatabaseProcedure::new(
|
||||
self.context.clone(),
|
||||
@@ -155,15 +157,15 @@ impl ReconciliationManager {
|
||||
resolve_strategy,
|
||||
false,
|
||||
);
|
||||
self.spawn_procedure(Box::new(procedure))
|
||||
self.spawn_procedure(Box::new(procedure)).await
|
||||
}
|
||||
|
||||
fn reconcile_physical_table(
|
||||
async fn reconcile_physical_table(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: TableName,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
) -> ProcedureId {
|
||||
) -> Result<ProcedureId> {
|
||||
let procedure = ReconcileTableProcedure::new(
|
||||
self.context.clone(),
|
||||
table_id,
|
||||
@@ -171,15 +173,15 @@ impl ReconciliationManager {
|
||||
resolve_strategy,
|
||||
false,
|
||||
);
|
||||
self.spawn_procedure(Box::new(procedure))
|
||||
self.spawn_procedure(Box::new(procedure)).await
|
||||
}
|
||||
|
||||
fn reconcile_logical_tables(
|
||||
async fn reconcile_logical_tables(
|
||||
&self,
|
||||
physical_table_id: TableId,
|
||||
physical_table_name: TableName,
|
||||
logical_tables: Vec<(TableId, TableName)>,
|
||||
) -> ProcedureId {
|
||||
) -> Result<ProcedureId> {
|
||||
let procedure = ReconcileLogicalTablesProcedure::new(
|
||||
self.context.clone(),
|
||||
physical_table_id,
|
||||
@@ -187,18 +189,18 @@ impl ReconciliationManager {
|
||||
logical_tables,
|
||||
false,
|
||||
);
|
||||
self.spawn_procedure(Box::new(procedure))
|
||||
self.spawn_procedure(Box::new(procedure)).await
|
||||
}
|
||||
|
||||
/// Reconcile a catalog.
|
||||
///
|
||||
/// Returns the procedure id of the reconciliation procedure.
|
||||
pub fn reconcile_catalog(
|
||||
pub async fn reconcile_catalog(
|
||||
&self,
|
||||
catalog: String,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
parallelism: usize,
|
||||
) -> ProcedureId {
|
||||
) -> Result<ProcedureId> {
|
||||
let parallelism = normalize_parallelism(parallelism);
|
||||
let procedure = ReconcileCatalogProcedure::new(
|
||||
self.context.clone(),
|
||||
@@ -207,29 +209,26 @@ impl ReconciliationManager {
|
||||
resolve_strategy,
|
||||
parallelism,
|
||||
);
|
||||
self.spawn_procedure(Box::new(procedure))
|
||||
self.spawn_procedure(Box::new(procedure)).await
|
||||
}
|
||||
|
||||
fn spawn_procedure(&self, procedure: BoxedProcedure) -> ProcedureId {
|
||||
async fn spawn_procedure(&self, procedure: BoxedProcedure) -> Result<ProcedureId> {
|
||||
let procedure_manager = self.procedure_manager.clone();
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(procedure);
|
||||
let procedure_id = procedure_with_id.id;
|
||||
let mut watcher = procedure_manager
|
||||
.submit(procedure_with_id)
|
||||
.await
|
||||
.context(error::SubmitProcedureSnafu)?;
|
||||
common_runtime::spawn_global(async move {
|
||||
let watcher = &mut match procedure_manager.submit(procedure_with_id).await {
|
||||
Ok(watcher) => watcher,
|
||||
Err(e) => {
|
||||
error!(e; "Failed to submit reconciliation procedure {procedure_id}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = watcher::wait(watcher).await {
|
||||
if let Err(e) = watcher::wait(&mut watcher).await {
|
||||
error!(e; "Failed to wait reconciliation procedure {procedure_id}");
|
||||
return;
|
||||
}
|
||||
|
||||
info!("Reconciliation procedure {procedure_id} is finished successfully!");
|
||||
});
|
||||
procedure_id
|
||||
Ok(procedure_id)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,10 +14,11 @@
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::time::Instant;
|
||||
|
||||
use common_procedure::error::FromJsonSnafu;
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, ProcedureId,
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
Result as ProcedureResult, Status,
|
||||
};
|
||||
use futures::stream::BoxStream;
|
||||
@@ -28,11 +29,13 @@ use crate::cache_invalidator::CacheInvalidatorRef;
|
||||
use crate::error::Result;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::lock_key::CatalogLock;
|
||||
use crate::metrics;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::reconciliation::reconcile_catalog::start::ReconcileCatalogStart;
|
||||
use crate::reconciliation::reconcile_database::utils::wait_for_inflight_subprocedures;
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
use crate::reconciliation::utils::Context;
|
||||
use crate::reconciliation::utils::{
|
||||
wait_for_inflight_subprocedures, Context, ReconcileCatalogMetrics, SubprocedureMeta,
|
||||
};
|
||||
|
||||
pub(crate) mod end;
|
||||
pub(crate) mod reconcile_databases;
|
||||
@@ -61,13 +64,15 @@ impl ReconcileCatalogContext {
|
||||
&mut self,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<()> {
|
||||
if let Some(procedure_id) = self.volatile_ctx.inflight_subprocedure {
|
||||
wait_for_inflight_subprocedures(
|
||||
if let Some(subprocedure) = self.volatile_ctx.inflight_subprocedure.take() {
|
||||
let subprocedures = [subprocedure];
|
||||
let result = wait_for_inflight_subprocedures(
|
||||
procedure_ctx,
|
||||
&[procedure_id],
|
||||
&subprocedures,
|
||||
self.persistent_ctx.fast_fail,
|
||||
)
|
||||
.await?;
|
||||
self.volatile_ctx.metrics += result.into();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -97,12 +102,26 @@ impl PersistentContext {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct VolatileContext {
|
||||
/// Stores the stream of catalogs.
|
||||
schemas: Option<BoxStream<'static, Result<String>>>,
|
||||
/// Stores the inflight subprocedure.
|
||||
inflight_subprocedure: Option<ProcedureId>,
|
||||
inflight_subprocedure: Option<SubprocedureMeta>,
|
||||
/// Stores the metrics of reconciling catalog.
|
||||
metrics: ReconcileCatalogMetrics,
|
||||
/// The start time of the reconciliation.
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
impl Default for VolatileContext {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
schemas: None,
|
||||
inflight_subprocedure: None,
|
||||
metrics: Default::default(),
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ReconcileCatalogProcedure {
|
||||
@@ -158,6 +177,11 @@ impl Procedure for ReconcileCatalogProcedure {
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &mut self.state;
|
||||
|
||||
let procedure_name = Self::TYPE_NAME;
|
||||
let step = state.name();
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
|
||||
.with_label_values(&[procedure_name, step])
|
||||
.start_timer();
|
||||
match state.next(&mut self.context, _ctx).await {
|
||||
Ok((next, status)) => {
|
||||
*state = next;
|
||||
@@ -165,8 +189,14 @@ impl Procedure for ReconcileCatalogProcedure {
|
||||
}
|
||||
Err(e) => {
|
||||
if e.is_retry_later() {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
|
||||
.inc();
|
||||
Err(ProcedureError::retry_later(e))
|
||||
} else {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
|
||||
.inc();
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -28,9 +29,16 @@ pub(crate) struct ReconcileCatalogEnd;
|
||||
impl State for ReconcileCatalogEnd {
|
||||
async fn next(
|
||||
&mut self,
|
||||
_ctx: &mut ReconcileCatalogContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
ctx: &mut ReconcileCatalogContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
info!(
|
||||
"Catalog reconciliation completed. catalog: {}, procedure_id: {}, metrics: {}, elapsed: {:?}",
|
||||
ctx.persistent_ctx.catalog,
|
||||
procedure_ctx.procedure_id,
|
||||
ctx.volatile_ctx.metrics,
|
||||
ctx.volatile_ctx.start_time.elapsed()
|
||||
);
|
||||
Ok((Box::new(ReconcileCatalogEnd), Status::done()))
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ use crate::error::Result;
|
||||
use crate::reconciliation::reconcile_catalog::end::ReconcileCatalogEnd;
|
||||
use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
|
||||
use crate::reconciliation::reconcile_database::ReconcileDatabaseProcedure;
|
||||
use crate::reconciliation::utils::Context;
|
||||
use crate::reconciliation::utils::{Context, SubprocedureMeta};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ReconcileDatabases;
|
||||
@@ -83,13 +83,18 @@ impl ReconcileDatabases {
|
||||
let procedure = ReconcileDatabaseProcedure::new(
|
||||
context,
|
||||
ctx.persistent_ctx.catalog.clone(),
|
||||
schema,
|
||||
schema.clone(),
|
||||
ctx.persistent_ctx.fast_fail,
|
||||
ctx.persistent_ctx.parallelism,
|
||||
ctx.persistent_ctx.resolve_strategy,
|
||||
true,
|
||||
);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
ctx.volatile_ctx.inflight_subprocedure = Some(SubprocedureMeta::new_reconcile_database(
|
||||
procedure_with_id.id,
|
||||
ctx.persistent_ctx.catalog.clone(),
|
||||
schema,
|
||||
));
|
||||
|
||||
Ok((
|
||||
Box::new(ReconcileDatabases),
|
||||
|
||||
@@ -16,16 +16,16 @@ pub(crate) mod end;
|
||||
pub(crate) mod reconcile_logical_tables;
|
||||
pub(crate) mod reconcile_tables;
|
||||
pub(crate) mod start;
|
||||
pub(crate) mod utils;
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::time::Instant;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, ProcedureId,
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
Result as ProcedureResult, Status,
|
||||
};
|
||||
use futures::stream::BoxStream;
|
||||
@@ -39,12 +39,13 @@ use crate::error::Result;
|
||||
use crate::key::table_name::TableNameValue;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock};
|
||||
use crate::metrics;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::reconciliation::reconcile_database::start::ReconcileDatabaseStart;
|
||||
use crate::reconciliation::reconcile_database::utils::wait_for_inflight_subprocedures;
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
use crate::reconciliation::utils::Context;
|
||||
|
||||
use crate::reconciliation::utils::{
|
||||
wait_for_inflight_subprocedures, Context, ReconcileDatabaseMetrics, SubprocedureMeta,
|
||||
};
|
||||
pub(crate) const DEFAULT_PARALLELISM: usize = 64;
|
||||
|
||||
pub(crate) struct ReconcileDatabaseContext {
|
||||
@@ -66,22 +67,32 @@ impl ReconcileDatabaseContext {
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits for inflight subprocedures to complete.
|
||||
pub(crate) async fn wait_for_inflight_subprocedures(
|
||||
&mut self,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<()> {
|
||||
if !self.volatile_ctx.inflight_subprocedures.is_empty() {
|
||||
wait_for_inflight_subprocedures(
|
||||
let result = wait_for_inflight_subprocedures(
|
||||
procedure_ctx,
|
||||
&self.volatile_ctx.inflight_subprocedures,
|
||||
self.persistent_ctx.fail_fast,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Collects result into metrics
|
||||
let metrics = result.into();
|
||||
self.volatile_ctx.inflight_subprocedures.clear();
|
||||
self.volatile_ctx.metrics += metrics;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the immutable metrics.
|
||||
pub(crate) fn metrics(&self) -> &ReconcileDatabaseMetrics {
|
||||
&self.volatile_ctx.metrics
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -114,7 +125,6 @@ impl PersistentContext {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct VolatileContext {
|
||||
/// Stores pending physical tables.
|
||||
pending_tables: Vec<(TableId, TableName)>,
|
||||
@@ -124,9 +134,26 @@ pub(crate) struct VolatileContext {
|
||||
/// - Value: Vector of (TableId, TableName) tuples representing logical tables belonging to the physical table.
|
||||
pending_logical_tables: HashMap<TableId, Vec<(TableId, TableName)>>,
|
||||
/// Stores inflight subprocedures.
|
||||
inflight_subprocedures: Vec<ProcedureId>,
|
||||
inflight_subprocedures: Vec<SubprocedureMeta>,
|
||||
/// Stores the stream of tables.
|
||||
tables: Option<BoxStream<'static, Result<(String, TableNameValue)>>>,
|
||||
/// The metrics of reconciling database.
|
||||
metrics: ReconcileDatabaseMetrics,
|
||||
/// The start time of the reconciliation.
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
impl Default for VolatileContext {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
pending_tables: vec![],
|
||||
pending_logical_tables: HashMap::new(),
|
||||
inflight_subprocedures: vec![],
|
||||
tables: None,
|
||||
metrics: ReconcileDatabaseMetrics::default(),
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ReconcileDatabaseProcedure {
|
||||
@@ -190,6 +217,11 @@ impl Procedure for ReconcileDatabaseProcedure {
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &mut self.state;
|
||||
|
||||
let procedure_name = Self::TYPE_NAME;
|
||||
let step = state.name();
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
|
||||
.with_label_values(&[procedure_name, step])
|
||||
.start_timer();
|
||||
match state.next(&mut self.context, _ctx).await {
|
||||
Ok((next, status)) => {
|
||||
*state = next;
|
||||
@@ -197,8 +229,14 @@ impl Procedure for ReconcileDatabaseProcedure {
|
||||
}
|
||||
Err(e) => {
|
||||
if e.is_retry_later() {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
|
||||
.inc();
|
||||
Err(ProcedureError::retry_later(e))
|
||||
} else {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
|
||||
.inc();
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -28,9 +29,17 @@ pub(crate) struct ReconcileDatabaseEnd;
|
||||
impl State for ReconcileDatabaseEnd {
|
||||
async fn next(
|
||||
&mut self,
|
||||
_ctx: &mut ReconcileDatabaseContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
ctx: &mut ReconcileDatabaseContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
info!(
|
||||
"Database reconciliation completed. schema: {}, catalog: {}, procedure_id: {}, metrics: {}, elapsed: {:?}",
|
||||
ctx.persistent_ctx.schema,
|
||||
ctx.persistent_ctx.catalog,
|
||||
procedure_ctx.procedure_id,
|
||||
ctx.metrics(),
|
||||
ctx.volatile_ctx.start_time.elapsed(),
|
||||
);
|
||||
Ok((Box::new(ReconcileDatabaseEnd), Status::done()))
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ use crate::key::table_route::TableRouteValue;
|
||||
use crate::reconciliation::reconcile_database::end::ReconcileDatabaseEnd;
|
||||
use crate::reconciliation::reconcile_database::{ReconcileDatabaseContext, State};
|
||||
use crate::reconciliation::reconcile_logical_tables::ReconcileLogicalTablesProcedure;
|
||||
use crate::reconciliation::utils::Context;
|
||||
use crate::reconciliation::utils::{Context, SubprocedureMeta};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ReconcileLogicalTables;
|
||||
@@ -128,13 +128,12 @@ impl State for ReconcileLogicalTables {
|
||||
impl ReconcileLogicalTables {
|
||||
fn schedule_reconcile_logical_tables(
|
||||
ctx: &mut ReconcileDatabaseContext,
|
||||
buffer: &mut Vec<ProcedureWithId>,
|
||||
buffer: &mut Vec<(ProcedureWithId, SubprocedureMeta)>,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let procedures = std::mem::take(buffer);
|
||||
ctx.volatile_ctx
|
||||
.inflight_subprocedures
|
||||
.extend(procedures.iter().map(|p| p.id));
|
||||
let buffer = std::mem::take(buffer);
|
||||
let (procedures, meta): (Vec<_>, Vec<_>) = buffer.into_iter().unzip();
|
||||
|
||||
ctx.volatile_ctx.inflight_subprocedures.extend(meta);
|
||||
Ok((
|
||||
Box::new(ReconcileLogicalTables),
|
||||
Status::suspended(procedures, false),
|
||||
@@ -142,7 +141,7 @@ impl ReconcileLogicalTables {
|
||||
}
|
||||
|
||||
fn should_schedule_reconcile_logical_tables(
|
||||
buffer: &[ProcedureWithId],
|
||||
buffer: &[(ProcedureWithId, SubprocedureMeta)],
|
||||
parallelism: usize,
|
||||
) -> bool {
|
||||
buffer.len() >= parallelism
|
||||
@@ -152,7 +151,7 @@ impl ReconcileLogicalTables {
|
||||
ctx: &Context,
|
||||
pending_logical_tables: &mut HashMap<TableId, Vec<(TableId, TableName)>>,
|
||||
parallelism: usize,
|
||||
) -> Result<Option<ProcedureWithId>> {
|
||||
) -> Result<Option<(ProcedureWithId, SubprocedureMeta)>> {
|
||||
let mut physical_table_id = None;
|
||||
for (table_id, tables) in pending_logical_tables.iter() {
|
||||
if tables.len() >= parallelism {
|
||||
@@ -176,7 +175,7 @@ impl ReconcileLogicalTables {
|
||||
async fn build_remaining_procedures(
|
||||
ctx: &Context,
|
||||
pending_logical_tables: &mut HashMap<TableId, Vec<(TableId, TableName)>>,
|
||||
pending_procedures: &mut Vec<ProcedureWithId>,
|
||||
pending_procedures: &mut Vec<(ProcedureWithId, SubprocedureMeta)>,
|
||||
parallelism: usize,
|
||||
) -> Result<()> {
|
||||
if pending_logical_tables.is_empty() {
|
||||
@@ -203,7 +202,7 @@ impl ReconcileLogicalTables {
|
||||
ctx: &Context,
|
||||
physical_table_id: TableId,
|
||||
logical_tables: Vec<(TableId, TableName)>,
|
||||
) -> Result<ProcedureWithId> {
|
||||
) -> Result<(ProcedureWithId, SubprocedureMeta)> {
|
||||
let table_info = ctx
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
@@ -217,12 +216,18 @@ impl ReconcileLogicalTables {
|
||||
let procedure = ReconcileLogicalTablesProcedure::new(
|
||||
ctx.clone(),
|
||||
physical_table_id,
|
||||
physical_table_name,
|
||||
logical_tables,
|
||||
physical_table_name.clone(),
|
||||
logical_tables.clone(),
|
||||
true,
|
||||
);
|
||||
|
||||
Ok(ProcedureWithId::with_random_id(Box::new(procedure)))
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
let subprocedure_meta = SubprocedureMeta::new_logical_table(
|
||||
procedure_with_id.id,
|
||||
physical_table_id,
|
||||
physical_table_name,
|
||||
logical_tables,
|
||||
);
|
||||
Ok((procedure_with_id, subprocedure_meta))
|
||||
}
|
||||
|
||||
fn enqueue_logical_table(
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::key::table_route::TableRouteValue;
|
||||
use crate::reconciliation::reconcile_database::reconcile_logical_tables::ReconcileLogicalTables;
|
||||
use crate::reconciliation::reconcile_database::{ReconcileDatabaseContext, State};
|
||||
use crate::reconciliation::reconcile_table::ReconcileTableProcedure;
|
||||
use crate::reconciliation::utils::Context;
|
||||
use crate::reconciliation::utils::{Context, SubprocedureMeta};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ReconcileTables;
|
||||
@@ -104,14 +104,14 @@ impl ReconcileTables {
|
||||
ctx: &mut ReconcileDatabaseContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let tables = std::mem::take(&mut ctx.volatile_ctx.pending_tables);
|
||||
let subprocedures = Self::build_reconcile_table_procedures(ctx, tables);
|
||||
ctx.volatile_ctx
|
||||
.inflight_subprocedures
|
||||
.extend(subprocedures.iter().map(|p| p.id));
|
||||
|
||||
let (procedures, meta): (Vec<_>, Vec<_>) =
|
||||
Self::build_reconcile_table_procedures(ctx, tables)
|
||||
.into_iter()
|
||||
.unzip();
|
||||
ctx.volatile_ctx.inflight_subprocedures.extend(meta);
|
||||
Ok((
|
||||
Box::new(ReconcileTables),
|
||||
Status::suspended(subprocedures, false),
|
||||
Status::suspended(procedures, false),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -125,7 +125,7 @@ impl ReconcileTables {
|
||||
fn build_reconcile_table_procedures(
|
||||
ctx: &ReconcileDatabaseContext,
|
||||
tables: Vec<(TableId, TableName)>,
|
||||
) -> Vec<ProcedureWithId> {
|
||||
) -> Vec<(ProcedureWithId, SubprocedureMeta)> {
|
||||
let mut procedures = Vec::with_capacity(tables.len());
|
||||
for (table_id, table_name) in tables {
|
||||
let context = Context {
|
||||
@@ -141,11 +141,13 @@ impl ReconcileTables {
|
||||
true,
|
||||
);
|
||||
let procedure = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
let meta =
|
||||
SubprocedureMeta::new_physical_table(procedure.id, table_id, table_name.clone());
|
||||
info!(
|
||||
"Reconcile table: {}, table_id: {}, procedure_id: {}",
|
||||
table_name, table_id, procedure.id
|
||||
);
|
||||
procedures.push(procedure)
|
||||
procedures.push((procedure, meta));
|
||||
}
|
||||
|
||||
procedures
|
||||
|
||||
@@ -33,7 +33,7 @@ impl State for ReconcileDatabaseStart {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut ReconcileDatabaseContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let exists = ctx
|
||||
.table_metadata_manager
|
||||
@@ -51,8 +51,8 @@ impl State for ReconcileDatabaseStart {
|
||||
},
|
||||
);
|
||||
info!(
|
||||
"Reconcile database: {}, catalog: {}",
|
||||
ctx.persistent_ctx.schema, ctx.persistent_ctx.catalog
|
||||
"Reconcile database: {}, catalog: {}, procedure_id: {}",
|
||||
ctx.persistent_ctx.schema, ctx.persistent_ctx.catalog, procedure_ctx.procedure_id,
|
||||
);
|
||||
Ok((Box::new(ReconcileTables), Status::executing(true)))
|
||||
}
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_procedure::{watcher, Context as ProcedureContext, ProcedureId};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use futures::future::{join_all, try_join_all};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
ProcedureStateReceiverNotFoundSnafu, ProcedureStateReceiverSnafu, Result, WaitProcedureSnafu,
|
||||
};
|
||||
|
||||
/// Wait for inflight subprocedures.
|
||||
///
|
||||
/// If `fail_fast` is true, the function will return an error if any subprocedure fails.
|
||||
/// Otherwise, the function will continue waiting for all subprocedures to complete.
|
||||
pub(crate) async fn wait_for_inflight_subprocedures(
|
||||
procedure_ctx: &ProcedureContext,
|
||||
subprocedures: &[ProcedureId],
|
||||
fail_fast: bool,
|
||||
) -> Result<()> {
|
||||
let mut receivers = Vec::with_capacity(subprocedures.len());
|
||||
for procedure_id in subprocedures {
|
||||
let receiver = procedure_ctx
|
||||
.provider
|
||||
.procedure_state_receiver(*procedure_id)
|
||||
.await
|
||||
.context(ProcedureStateReceiverSnafu {
|
||||
procedure_id: *procedure_id,
|
||||
})?
|
||||
.context(ProcedureStateReceiverNotFoundSnafu {
|
||||
procedure_id: *procedure_id,
|
||||
})?;
|
||||
receivers.push(receiver);
|
||||
}
|
||||
|
||||
let mut tasks = Vec::with_capacity(receivers.len());
|
||||
for receiver in receivers.iter_mut() {
|
||||
let fut = watcher::wait(receiver);
|
||||
tasks.push(fut);
|
||||
}
|
||||
|
||||
if fail_fast {
|
||||
try_join_all(tasks).await.context(WaitProcedureSnafu)?;
|
||||
} else {
|
||||
let mut failed = 0;
|
||||
let total = tasks.len();
|
||||
for result in join_all(tasks).await {
|
||||
if let Err(e) = result {
|
||||
error!(e; "inflight subprocedure, procedure_id: {}", procedure_ctx.procedure_id);
|
||||
failed += 1;
|
||||
}
|
||||
}
|
||||
if failed > 0 {
|
||||
warn!(
|
||||
"{} inflight subprocedures failed, total: {}, procedure_id: {}",
|
||||
failed, total, procedure_ctx.procedure_id
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"{} inflight subprocedures completed, procedure_id: {}",
|
||||
total, procedure_ctx.procedure_id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -40,15 +40,17 @@ use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::metrics;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::reconciliation::reconcile_logical_tables::reconciliation_start::ReconciliationStart;
|
||||
use crate::reconciliation::utils::Context;
|
||||
use crate::reconciliation::utils::{Context, ReconcileLogicalTableMetrics};
|
||||
|
||||
pub struct ReconcileLogicalTablesContext {
|
||||
pub node_manager: NodeManagerRef,
|
||||
pub table_metadata_manager: TableMetadataManagerRef,
|
||||
pub cache_invalidator: CacheInvalidatorRef,
|
||||
pub persistent_ctx: PersistentContext,
|
||||
pub volatile_ctx: VolatileContext,
|
||||
}
|
||||
|
||||
impl ReconcileLogicalTablesContext {
|
||||
@@ -59,16 +61,29 @@ impl ReconcileLogicalTablesContext {
|
||||
table_metadata_manager: ctx.table_metadata_manager,
|
||||
cache_invalidator: ctx.cache_invalidator,
|
||||
persistent_ctx,
|
||||
volatile_ctx: VolatileContext::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the physical table name.
|
||||
pub(crate) fn table_name(&self) -> &TableName {
|
||||
&self.persistent_ctx.table_name
|
||||
}
|
||||
|
||||
/// Returns the physical table id.
|
||||
pub(crate) fn table_id(&self) -> TableId {
|
||||
self.persistent_ctx.table_id
|
||||
}
|
||||
|
||||
/// Returns a mutable reference to the metrics.
|
||||
pub(crate) fn mut_metrics(&mut self) -> &mut ReconcileLogicalTableMetrics {
|
||||
&mut self.volatile_ctx.metrics
|
||||
}
|
||||
|
||||
/// Returns a reference to the metrics.
|
||||
pub(crate) fn metrics(&self) -> &ReconcileLogicalTableMetrics {
|
||||
&self.volatile_ctx.metrics
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -120,6 +135,11 @@ impl PersistentContext {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct VolatileContext {
|
||||
pub(crate) metrics: ReconcileLogicalTableMetrics,
|
||||
}
|
||||
|
||||
pub struct ReconcileLogicalTablesProcedure {
|
||||
pub context: ReconcileLogicalTablesContext,
|
||||
state: Box<dyn State>,
|
||||
@@ -173,6 +193,11 @@ impl Procedure for ReconcileLogicalTablesProcedure {
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &mut self.state;
|
||||
|
||||
let procedure_name = Self::TYPE_NAME;
|
||||
let step = state.name();
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
|
||||
.with_label_values(&[procedure_name, step])
|
||||
.start_timer();
|
||||
match state.next(&mut self.context, _ctx).await {
|
||||
Ok((next, status)) => {
|
||||
*state = next;
|
||||
@@ -180,8 +205,14 @@ impl Procedure for ReconcileLogicalTablesProcedure {
|
||||
}
|
||||
Err(e) => {
|
||||
if e.is_retry_later() {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
|
||||
.inc();
|
||||
Err(ProcedureError::retry_later(e))
|
||||
} else {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
|
||||
.inc();
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -28,9 +29,21 @@ pub struct ReconciliationEnd;
|
||||
impl State for ReconciliationEnd {
|
||||
async fn next(
|
||||
&mut self,
|
||||
_ctx: &mut ReconcileLogicalTablesContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
ctx: &mut ReconcileLogicalTablesContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let table_id = ctx.table_id();
|
||||
let table_name = ctx.table_name();
|
||||
let metrics = ctx.metrics();
|
||||
|
||||
info!(
|
||||
"Logical tables reconciliation completed. logical tables: {:?}, physical_table_id: {}, table_name: {}, procedure_id: {}, metrics: {}",
|
||||
ctx.persistent_ctx.logical_table_ids,
|
||||
table_id,
|
||||
table_name,
|
||||
procedure_ctx.procedure_id,
|
||||
metrics
|
||||
);
|
||||
Ok((Box::new(ReconciliationEnd), Status::done()))
|
||||
}
|
||||
|
||||
|
||||
@@ -25,8 +25,11 @@ use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
|
||||
use crate::ddl::utils::table_id::get_all_table_ids_by_names;
|
||||
use crate::ddl::utils::table_info::all_logical_table_routes_have_same_physical_id;
|
||||
use crate::error::{self, Result};
|
||||
use crate::metrics;
|
||||
use crate::reconciliation::reconcile_logical_tables::resolve_table_metadatas::ResolveTableMetadatas;
|
||||
use crate::reconciliation::reconcile_logical_tables::{ReconcileLogicalTablesContext, State};
|
||||
use crate::reconciliation::reconcile_logical_tables::{
|
||||
ReconcileLogicalTablesContext, ReconcileLogicalTablesProcedure, State,
|
||||
};
|
||||
use crate::reconciliation::utils::check_column_metadatas_consistent;
|
||||
|
||||
/// The start state of the reconciliation procedure.
|
||||
@@ -39,7 +42,7 @@ impl State for ReconciliationStart {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut ReconcileLogicalTablesContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let table_id = ctx.table_id();
|
||||
let table_name = ctx.table_name();
|
||||
@@ -58,35 +61,48 @@ impl State for ReconciliationStart {
|
||||
}
|
||||
);
|
||||
|
||||
info!(
|
||||
"Starting reconciliation for logical table: table_id: {}, table_name: {}",
|
||||
table_id, table_name
|
||||
);
|
||||
|
||||
let region_metadata_lister = RegionMetadataLister::new(ctx.node_manager.clone());
|
||||
let region_metadatas = region_metadata_lister
|
||||
.list(physical_table_id, &physical_table_route.region_routes)
|
||||
.await?;
|
||||
let region_metadatas = {
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION
|
||||
.with_label_values(&[metrics::TABLE_TYPE_PHYSICAL])
|
||||
.start_timer();
|
||||
region_metadata_lister
|
||||
.list(physical_table_id, &physical_table_route.region_routes)
|
||||
.await?
|
||||
};
|
||||
|
||||
ensure!(!region_metadatas.is_empty(), {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileLogicalTablesProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
metrics::STATS_TYPE_NO_REGION_METADATA,
|
||||
])
|
||||
.inc();
|
||||
|
||||
ensure!(
|
||||
!region_metadatas.is_empty(),
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"No region metadata found for table: {}, table_id: {}",
|
||||
"No region metadata found for physical table: {}, table_id: {}",
|
||||
table_name, table_id
|
||||
),
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
if region_metadatas.iter().any(|r| r.is_none()) {
|
||||
return error::UnexpectedSnafu {
|
||||
ensure!(region_metadatas.iter().all(|r| r.is_some()), {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileLogicalTablesProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
metrics::STATS_TYPE_REGION_NOT_OPEN,
|
||||
])
|
||||
.inc();
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Some regions of the physical table are not open. Table: {}, table_id: {}",
|
||||
"Some regions of the physical table are not open. physical table: {}, table_id: {}",
|
||||
table_name, table_id
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
});
|
||||
|
||||
// Safety: checked above
|
||||
let region_metadatas = region_metadatas
|
||||
@@ -96,14 +112,13 @@ impl State for ReconciliationStart {
|
||||
let _region_metadata = check_column_metadatas_consistent(®ion_metadatas).context(
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Column metadatas are not consistent for table: {}, table_id: {}",
|
||||
"Column metadatas are not consistent for physical table: {}, table_id: {}",
|
||||
table_name, table_id
|
||||
),
|
||||
},
|
||||
)?;
|
||||
|
||||
// TODO(weny): ensure all columns in region metadata can be found in table info.
|
||||
|
||||
// Validates the logical tables.
|
||||
Self::validate_schema(&ctx.persistent_ctx.logical_tables)?;
|
||||
let table_refs = ctx
|
||||
@@ -119,6 +134,12 @@ impl State for ReconciliationStart {
|
||||
.await?;
|
||||
Self::validate_logical_table_routes(ctx, &table_ids).await?;
|
||||
|
||||
let table_name = ctx.table_name();
|
||||
info!(
|
||||
"Starting reconciliation for logical tables: {:?}, physical_table_id: {}, table_name: {}, procedure_id: {}",
|
||||
table_ids, table_id, table_name, procedure_ctx.procedure_id
|
||||
);
|
||||
|
||||
ctx.persistent_ctx.physical_table_route = Some(physical_table_route);
|
||||
ctx.persistent_ctx.logical_table_ids = table_ids;
|
||||
Ok((Box::new(ResolveTableMetadatas), Status::executing(true)))
|
||||
|
||||
@@ -22,8 +22,11 @@ use snafu::ensure;
|
||||
use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
|
||||
use crate::ddl::utils::table_info::get_all_table_info_values_by_table_ids;
|
||||
use crate::error::{self, Result};
|
||||
use crate::metrics;
|
||||
use crate::reconciliation::reconcile_logical_tables::reconcile_regions::ReconcileRegions;
|
||||
use crate::reconciliation::reconcile_logical_tables::{ReconcileLogicalTablesContext, State};
|
||||
use crate::reconciliation::reconcile_logical_tables::{
|
||||
ReconcileLogicalTablesContext, ReconcileLogicalTablesProcedure, State,
|
||||
};
|
||||
use crate::reconciliation::utils::{
|
||||
check_column_metadatas_consistent, need_update_logical_table_info,
|
||||
};
|
||||
@@ -65,22 +68,38 @@ impl State for ResolveTableMetadatas {
|
||||
.unwrap()
|
||||
.region_routes;
|
||||
let region_metadata_lister = RegionMetadataLister::new(ctx.node_manager.clone());
|
||||
let mut metadata_consistent_count = 0;
|
||||
let mut metadata_inconsistent_count = 0;
|
||||
let mut create_tables_count = 0;
|
||||
for (table_id, table_info_value) in table_ids.iter().zip(table_info_values.iter()) {
|
||||
let region_metadatas = region_metadata_lister
|
||||
.list(*table_id, region_routes)
|
||||
.await?;
|
||||
let region_metadatas = {
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION
|
||||
.with_label_values(&[metrics::TABLE_TYPE_LOGICAL])
|
||||
.start_timer();
|
||||
region_metadata_lister
|
||||
.list(*table_id, region_routes)
|
||||
.await?
|
||||
};
|
||||
|
||||
ensure!(!region_metadatas.is_empty(), {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileLogicalTablesProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_LOGICAL,
|
||||
metrics::STATS_TYPE_NO_REGION_METADATA,
|
||||
])
|
||||
.inc();
|
||||
|
||||
ensure!(
|
||||
!region_metadatas.is_empty(),
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"No region metadata found for table: {}, table_id: {}",
|
||||
table_info_value.table_info.name, table_id
|
||||
),
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
if region_metadatas.iter().any(|r| r.is_none()) {
|
||||
create_tables_count += 1;
|
||||
create_tables.push((*table_id, table_info_value.table_info.clone()));
|
||||
continue;
|
||||
}
|
||||
@@ -91,10 +110,12 @@ impl State for ResolveTableMetadatas {
|
||||
.map(|r| r.unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
if let Some(column_metadatas) = check_column_metadatas_consistent(®ion_metadatas) {
|
||||
metadata_consistent_count += 1;
|
||||
if need_update_logical_table_info(&table_info_value.table_info, &column_metadatas) {
|
||||
update_table_infos.push((*table_id, column_metadatas));
|
||||
}
|
||||
} else {
|
||||
metadata_inconsistent_count += 1;
|
||||
// If the logical regions have inconsistent column metadatas, it won't affect read and write.
|
||||
// It's safe to continue if the column metadatas of the logical table are inconsistent.
|
||||
warn!(
|
||||
@@ -121,6 +142,11 @@ impl State for ResolveTableMetadatas {
|
||||
);
|
||||
ctx.persistent_ctx.update_table_infos = update_table_infos;
|
||||
ctx.persistent_ctx.create_tables = create_tables;
|
||||
// Update metrics.
|
||||
let metrics = ctx.mut_metrics();
|
||||
metrics.column_metadata_consistent_count = metadata_consistent_count;
|
||||
metrics.column_metadata_inconsistent_count = metadata_inconsistent_count;
|
||||
metrics.create_tables_count = create_tables_count;
|
||||
Ok((Box::new(ReconcileRegions), Status::executing(true)))
|
||||
}
|
||||
|
||||
|
||||
@@ -96,6 +96,7 @@ impl State for UpdateTableInfos {
|
||||
let table_id = ctx.table_id();
|
||||
let table_name = ctx.table_name();
|
||||
|
||||
let updated_table_info_num = table_info_values_to_update.len();
|
||||
batch_update_table_info_values(&ctx.table_metadata_manager, table_info_values_to_update)
|
||||
.await?;
|
||||
|
||||
@@ -122,6 +123,9 @@ impl State for UpdateTableInfos {
|
||||
.await?;
|
||||
|
||||
ctx.persistent_ctx.update_table_infos.clear();
|
||||
// Update metrics.
|
||||
let metrics = ctx.mut_metrics();
|
||||
metrics.update_table_info_count = updated_table_info_num;
|
||||
Ok((Box::new(ReconciliationEnd), Status::executing(false)))
|
||||
}
|
||||
|
||||
|
||||
@@ -40,10 +40,13 @@ use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
|
||||
use crate::metrics;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::reconciliation::reconcile_table::reconciliation_start::ReconciliationStart;
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
use crate::reconciliation::utils::{build_table_meta_from_column_metadatas, Context};
|
||||
use crate::reconciliation::utils::{
|
||||
build_table_meta_from_column_metadatas, Context, ReconcileTableMetrics,
|
||||
};
|
||||
|
||||
pub struct ReconcileTableContext {
|
||||
pub node_manager: NodeManagerRef,
|
||||
@@ -65,13 +68,46 @@ impl ReconcileTableContext {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the physical table name.
|
||||
pub(crate) fn table_name(&self) -> &TableName {
|
||||
&self.persistent_ctx.table_name
|
||||
}
|
||||
|
||||
/// Returns the physical table id.
|
||||
pub(crate) fn table_id(&self) -> TableId {
|
||||
self.persistent_ctx.table_id
|
||||
}
|
||||
|
||||
/// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
|
||||
pub(crate) fn build_table_meta(
|
||||
&self,
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
) -> Result<RawTableMeta> {
|
||||
// Safety: The table info value is set in `ReconciliationStart` state.
|
||||
let table_info_value = self.persistent_ctx.table_info_value.as_ref().unwrap();
|
||||
let table_id = self.table_id();
|
||||
let table_ref = self.table_name().table_ref();
|
||||
let name_to_ids = table_info_value.table_info.name_to_ids();
|
||||
let table_meta = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_info_value.table_info.meta,
|
||||
name_to_ids,
|
||||
column_metadatas,
|
||||
)?;
|
||||
|
||||
Ok(table_meta)
|
||||
}
|
||||
|
||||
/// Returns a mutable reference to the metrics.
|
||||
pub(crate) fn mut_metrics(&mut self) -> &mut ReconcileTableMetrics {
|
||||
&mut self.volatile_ctx.metrics
|
||||
}
|
||||
|
||||
/// Returns a reference to the metrics.
|
||||
pub(crate) fn metrics(&self) -> &ReconcileTableMetrics {
|
||||
&self.volatile_ctx.metrics
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -110,29 +146,7 @@ impl PersistentContext {
|
||||
#[derive(Default)]
|
||||
pub(crate) struct VolatileContext {
|
||||
pub(crate) table_meta: Option<RawTableMeta>,
|
||||
}
|
||||
|
||||
impl ReconcileTableContext {
|
||||
/// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
|
||||
pub(crate) fn build_table_meta(
|
||||
&self,
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
) -> Result<RawTableMeta> {
|
||||
// Safety: The table info value is set in `ReconciliationStart` state.
|
||||
let table_info_value = self.persistent_ctx.table_info_value.as_ref().unwrap();
|
||||
let table_id = self.table_id();
|
||||
let table_ref = self.table_name().table_ref();
|
||||
let name_to_ids = table_info_value.table_info.name_to_ids();
|
||||
let table_meta = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_info_value.table_info.meta,
|
||||
name_to_ids,
|
||||
column_metadatas,
|
||||
)?;
|
||||
|
||||
Ok(table_meta)
|
||||
}
|
||||
pub(crate) metrics: ReconcileTableMetrics,
|
||||
}
|
||||
|
||||
pub struct ReconcileTableProcedure {
|
||||
@@ -191,6 +205,11 @@ impl Procedure for ReconcileTableProcedure {
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &mut self.state;
|
||||
|
||||
let procedure_name = Self::TYPE_NAME;
|
||||
let step = state.name();
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_PROCEDURE
|
||||
.with_label_values(&[procedure_name, step])
|
||||
.start_timer();
|
||||
match state.next(&mut self.context, _ctx).await {
|
||||
Ok((next, status)) => {
|
||||
*state = next;
|
||||
@@ -198,8 +217,14 @@ impl Procedure for ReconcileTableProcedure {
|
||||
}
|
||||
Err(e) => {
|
||||
if e.is_retry_later() {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_RETRYABLE])
|
||||
.inc();
|
||||
Err(ProcedureError::retry_later(e))
|
||||
} else {
|
||||
metrics::METRIC_META_RECONCILIATION_PROCEDURE_ERROR
|
||||
.with_label_values(&[procedure_name, step, metrics::ERROR_TYPE_EXTERNAL])
|
||||
.inc();
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tonic::async_trait;
|
||||
|
||||
@@ -31,9 +32,18 @@ pub struct ReconciliationEnd;
|
||||
impl State for ReconciliationEnd {
|
||||
async fn next(
|
||||
&mut self,
|
||||
_ctx: &mut ReconcileTableContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
ctx: &mut ReconcileTableContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let table_id = ctx.table_id();
|
||||
let table_name = ctx.table_name();
|
||||
let metrics = ctx.metrics();
|
||||
|
||||
info!(
|
||||
"Physical table reconciliation completed. table_name: {}, table_id: {}, procedure_id: {}, metrics: {}",
|
||||
table_name, table_id, procedure_ctx.procedure_id, metrics
|
||||
);
|
||||
|
||||
Ok((Box::new(ReconciliationEnd), Status::done()))
|
||||
}
|
||||
|
||||
|
||||
@@ -20,9 +20,12 @@ use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
|
||||
use crate::error::{self, Result, UnexpectedSnafu};
|
||||
use crate::error::{self, Result};
|
||||
use crate::metrics::{self};
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveColumnMetadata;
|
||||
use crate::reconciliation::reconcile_table::{ReconcileTableContext, State};
|
||||
use crate::reconciliation::reconcile_table::{
|
||||
ReconcileTableContext, ReconcileTableProcedure, State,
|
||||
};
|
||||
|
||||
/// The start state of the reconciliation procedure.
|
||||
///
|
||||
@@ -40,7 +43,7 @@ impl State for ReconciliationStart {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut ReconcileTableContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let table_id = ctx.table_id();
|
||||
let table_name = ctx.table_name();
|
||||
@@ -60,33 +63,56 @@ impl State for ReconciliationStart {
|
||||
}
|
||||
);
|
||||
|
||||
info!("Reconciling table: {}, table_id: {}", table_name, table_id);
|
||||
info!(
|
||||
"Reconciling table: {}, table_id: {}, procedure_id: {}",
|
||||
table_name, table_id, procedure_ctx.procedure_id
|
||||
);
|
||||
// TODO(weny): Repairs the table route if needed.
|
||||
let region_metadata_lister = RegionMetadataLister::new(ctx.node_manager.clone());
|
||||
// Always list region metadatas for the physical table.
|
||||
let region_metadatas = region_metadata_lister
|
||||
.list(physical_table_id, &physical_table_route.region_routes)
|
||||
.await?;
|
||||
|
||||
ensure!(
|
||||
!region_metadatas.is_empty(),
|
||||
let region_metadatas = {
|
||||
let _timer = metrics::METRIC_META_RECONCILIATION_LIST_REGION_METADATA_DURATION
|
||||
.with_label_values(&[metrics::TABLE_TYPE_PHYSICAL])
|
||||
.start_timer();
|
||||
// Always list region metadatas for the physical table.
|
||||
region_metadata_lister
|
||||
.list(physical_table_id, &physical_table_route.region_routes)
|
||||
.await?
|
||||
};
|
||||
|
||||
ensure!(!region_metadatas.is_empty(), {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileTableProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
metrics::STATS_TYPE_NO_REGION_METADATA,
|
||||
])
|
||||
.inc();
|
||||
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"No region metadata found for table: {}, table_id: {}",
|
||||
table_name, table_id
|
||||
),
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
if region_metadatas.iter().any(|r| r.is_none()) {
|
||||
return UnexpectedSnafu {
|
||||
ensure!(region_metadatas.iter().all(|r| r.is_some()), {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileTableProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
metrics::STATS_TYPE_REGION_NOT_OPEN,
|
||||
])
|
||||
.inc();
|
||||
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Some regions are not opened, table: {}, table_id: {}",
|
||||
table_name, table_id
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
});
|
||||
|
||||
// Persist the physical table route.
|
||||
// TODO(weny): refetch the physical table route if repair is needed.
|
||||
|
||||
@@ -20,6 +20,7 @@ use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use strum::AsRefStr;
|
||||
|
||||
use crate::error::{self, MissingColumnIdsSnafu, Result};
|
||||
use crate::reconciliation::reconcile_table::reconcile_regions::ReconcileRegions;
|
||||
@@ -28,10 +29,11 @@ use crate::reconciliation::reconcile_table::{ReconcileTableContext, State};
|
||||
use crate::reconciliation::utils::{
|
||||
build_column_metadata_from_table_info, check_column_metadatas_consistent,
|
||||
resolve_column_metadatas_with_latest, resolve_column_metadatas_with_metasrv,
|
||||
ResolveColumnMetadataResult,
|
||||
};
|
||||
|
||||
/// Strategy for resolving column metadata inconsistencies.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default)]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, AsRefStr)]
|
||||
pub enum ResolveStrategy {
|
||||
#[default]
|
||||
/// Trusts the latest column metadata from datanode.
|
||||
@@ -98,6 +100,10 @@ impl State for ResolveColumnMetadata {
|
||||
"Column metadatas are consistent for table: {}, table_id: {}.",
|
||||
table_name, table_id
|
||||
);
|
||||
|
||||
// Update metrics.
|
||||
ctx.mut_metrics().resolve_column_metadata_result =
|
||||
Some(ResolveColumnMetadataResult::Consistent);
|
||||
return Ok((
|
||||
Box::new(UpdateTableInfo::new(table_info_value, column_metadatas)),
|
||||
Status::executing(false),
|
||||
@@ -119,6 +125,11 @@ impl State for ResolveColumnMetadata {
|
||||
|
||||
let region_ids =
|
||||
resolve_column_metadatas_with_metasrv(&column_metadata, &self.region_metadata)?;
|
||||
|
||||
// Update metrics.
|
||||
let metrics = ctx.mut_metrics();
|
||||
metrics.resolve_column_metadata_result =
|
||||
Some(ResolveColumnMetadataResult::Inconsistent(self.strategy));
|
||||
Ok((
|
||||
Box::new(ReconcileRegions::new(column_metadata, region_ids)),
|
||||
Status::executing(true),
|
||||
@@ -127,16 +138,29 @@ impl State for ResolveColumnMetadata {
|
||||
ResolveStrategy::UseLatest => {
|
||||
let (column_metadatas, region_ids) =
|
||||
resolve_column_metadatas_with_latest(&self.region_metadata)?;
|
||||
|
||||
// Update metrics.
|
||||
let metrics = ctx.mut_metrics();
|
||||
metrics.resolve_column_metadata_result =
|
||||
Some(ResolveColumnMetadataResult::Inconsistent(self.strategy));
|
||||
Ok((
|
||||
Box::new(ReconcileRegions::new(column_metadatas, region_ids)),
|
||||
Status::executing(true),
|
||||
))
|
||||
}
|
||||
ResolveStrategy::AbortOnConflict => error::ColumnMetadataConflictsSnafu {
|
||||
table_name: table_name.to_string(),
|
||||
table_id,
|
||||
ResolveStrategy::AbortOnConflict => {
|
||||
let table_name = table_name.to_string();
|
||||
|
||||
// Update metrics.
|
||||
let metrics = ctx.mut_metrics();
|
||||
metrics.resolve_column_metadata_result =
|
||||
Some(ResolveColumnMetadataResult::Inconsistent(self.strategy));
|
||||
error::ColumnMetadataConflictsSnafu {
|
||||
table_name,
|
||||
table_id,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -116,6 +116,9 @@ impl State for UpdateTableInfo {
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
// Update metrics.
|
||||
let metrics = ctx.mut_metrics();
|
||||
metrics.update_table_info = true;
|
||||
|
||||
Ok((Box::new(ReconciliationEnd), Status::executing(true)))
|
||||
}
|
||||
|
||||
@@ -13,23 +13,35 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
use std::fmt::{self, Display};
|
||||
use std::ops::AddAssign;
|
||||
use std::time::Instant;
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use common_telemetry::warn;
|
||||
use common_procedure::{watcher, Context as ProcedureContext, ProcedureId};
|
||||
use common_telemetry::{error, warn};
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use futures::future::{join_all, try_join_all};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::{RawTableInfo, RawTableMeta};
|
||||
use table::table_name::TableName;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::cache_invalidator::CacheInvalidatorRef;
|
||||
use crate::error::{
|
||||
MismatchColumnIdSnafu, MissingColumnInColumnMetadataSnafu, Result, UnexpectedSnafu,
|
||||
ColumnIdMismatchSnafu, ColumnNotFoundSnafu, MismatchColumnIdSnafu,
|
||||
MissingColumnInColumnMetadataSnafu, ProcedureStateReceiverNotFoundSnafu,
|
||||
ProcedureStateReceiverSnafu, Result, TimestampMismatchSnafu, UnexpectedSnafu,
|
||||
WaitProcedureSnafu,
|
||||
};
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::metrics;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::reconciliation::reconcile_logical_tables::ReconcileLogicalTablesProcedure;
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
use crate::reconciliation::reconcile_table::ReconcileTableProcedure;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct PartialRegionMetadata<'a> {
|
||||
@@ -48,20 +60,6 @@ impl<'a> From<&'a RegionMetadata> for PartialRegionMetadata<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A display wrapper for [`ColumnMetadata`] that formats the column metadata in a more readable way.
|
||||
struct ColumnMetadataDisplay<'a>(pub &'a ColumnMetadata);
|
||||
|
||||
impl<'a> fmt::Debug for ColumnMetadataDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let col = self.0;
|
||||
write!(
|
||||
f,
|
||||
"Column {{ name: {}, id: {}, semantic_type: {:?}, data_type: {:?} }}",
|
||||
col.column_schema.name, col.column_id, col.semantic_type, col.column_schema.data_type,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the column metadatas are consistent.
|
||||
///
|
||||
/// The column metadatas are consistent if:
|
||||
@@ -110,21 +108,7 @@ pub(crate) fn resolve_column_metadatas_with_metasrv(
|
||||
let mut regions_ids = vec![];
|
||||
for region_metadata in region_metadatas {
|
||||
if region_metadata.column_metadatas != column_metadatas {
|
||||
let is_invariant_preserved = check_column_metadata_invariants(
|
||||
column_metadatas,
|
||||
®ion_metadata.column_metadatas,
|
||||
);
|
||||
ensure!(
|
||||
is_invariant_preserved,
|
||||
UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Column metadata invariants violated for region {}. Resolved column metadata: {:?}, region column metadata: {:?}",
|
||||
region_metadata.region_id,
|
||||
column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
|
||||
region_metadata.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
);
|
||||
check_column_metadata_invariants(column_metadatas, ®ion_metadata.column_metadatas)?;
|
||||
regions_ids.push(region_metadata.region_id);
|
||||
}
|
||||
}
|
||||
@@ -163,21 +147,10 @@ pub(crate) fn resolve_column_metadatas_with_latest(
|
||||
let mut region_ids = vec![];
|
||||
for region_metadata in region_metadatas {
|
||||
if PartialRegionMetadata::from(region_metadata) != latest_column_metadatas {
|
||||
let is_invariant_preserved = check_column_metadata_invariants(
|
||||
check_column_metadata_invariants(
|
||||
&latest_region_metadata.column_metadatas,
|
||||
®ion_metadata.column_metadatas,
|
||||
);
|
||||
ensure!(
|
||||
is_invariant_preserved,
|
||||
UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Column metadata invariants violated for region {}. Resolved column metadata: {:?}, region column metadata: {:?}",
|
||||
region_metadata.region_id,
|
||||
latest_column_metadatas.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
|
||||
region_metadata.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>()
|
||||
)
|
||||
}
|
||||
);
|
||||
)?;
|
||||
region_ids.push(region_metadata.region_id);
|
||||
}
|
||||
}
|
||||
@@ -239,7 +212,7 @@ pub(crate) fn build_column_metadata_from_table_info(
|
||||
pub(crate) fn check_column_metadata_invariants(
|
||||
new_column_metadatas: &[ColumnMetadata],
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
) -> bool {
|
||||
) -> Result<()> {
|
||||
let new_primary_keys = new_column_metadatas
|
||||
.iter()
|
||||
.filter(|c| c.semantic_type == SemanticType::Tag)
|
||||
@@ -252,22 +225,50 @@ pub(crate) fn check_column_metadata_invariants(
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id));
|
||||
|
||||
for (name, id) in old_primary_keys {
|
||||
if new_primary_keys.get(name) != Some(&id) {
|
||||
return false;
|
||||
}
|
||||
let column_id = new_primary_keys
|
||||
.get(name)
|
||||
.cloned()
|
||||
.context(ColumnNotFoundSnafu {
|
||||
column_name: name,
|
||||
column_id: id,
|
||||
})?;
|
||||
|
||||
ensure!(
|
||||
column_id == id,
|
||||
ColumnIdMismatchSnafu {
|
||||
column_name: name,
|
||||
expected_column_id: id,
|
||||
actual_column_id: column_id,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let new_ts_column = new_column_metadatas
|
||||
.iter()
|
||||
.find(|c| c.semantic_type == SemanticType::Timestamp)
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id));
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id))
|
||||
.context(UnexpectedSnafu {
|
||||
err_msg: "Timestamp column not found in new column metadata",
|
||||
})?;
|
||||
|
||||
let old_ts_column = column_metadatas
|
||||
.iter()
|
||||
.find(|c| c.semantic_type == SemanticType::Timestamp)
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id));
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id))
|
||||
.context(UnexpectedSnafu {
|
||||
err_msg: "Timestamp column not found in column metadata",
|
||||
})?;
|
||||
ensure!(
|
||||
new_ts_column == old_ts_column,
|
||||
TimestampMismatchSnafu {
|
||||
expected_column_name: old_ts_column.0,
|
||||
expected_column_id: old_ts_column.1,
|
||||
actual_column_name: new_ts_column.0,
|
||||
actual_column_id: new_ts_column.1,
|
||||
}
|
||||
);
|
||||
|
||||
new_ts_column == old_ts_column
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
|
||||
@@ -406,6 +407,88 @@ pub(crate) fn need_update_logical_table_info(
|
||||
table_info.meta.schema.column_schemas.len() != column_metadatas.len()
|
||||
}
|
||||
|
||||
/// The result of waiting for inflight subprocedures.
|
||||
pub struct PartialSuccessResult<'a> {
|
||||
pub failed_procedures: Vec<&'a SubprocedureMeta>,
|
||||
pub success_procedures: Vec<&'a SubprocedureMeta>,
|
||||
}
|
||||
|
||||
/// The result of waiting for inflight subprocedures.
|
||||
pub enum WaitForInflightSubproceduresResult<'a> {
|
||||
Success(Vec<&'a SubprocedureMeta>),
|
||||
PartialSuccess(PartialSuccessResult<'a>),
|
||||
}
|
||||
|
||||
/// Wait for inflight subprocedures.
|
||||
///
|
||||
/// If `fail_fast` is true, the function will return an error if any subprocedure fails.
|
||||
/// Otherwise, the function will continue waiting for all subprocedures to complete.
|
||||
pub(crate) async fn wait_for_inflight_subprocedures<'a>(
|
||||
procedure_ctx: &ProcedureContext,
|
||||
subprocedures: &'a [SubprocedureMeta],
|
||||
fail_fast: bool,
|
||||
) -> Result<WaitForInflightSubproceduresResult<'a>> {
|
||||
let mut receivers = Vec::with_capacity(subprocedures.len());
|
||||
for subprocedure in subprocedures {
|
||||
let procedure_id = subprocedure.procedure_id();
|
||||
let receiver = procedure_ctx
|
||||
.provider
|
||||
.procedure_state_receiver(procedure_id)
|
||||
.await
|
||||
.context(ProcedureStateReceiverSnafu { procedure_id })?
|
||||
.context(ProcedureStateReceiverNotFoundSnafu { procedure_id })?;
|
||||
receivers.push((receiver, subprocedure));
|
||||
}
|
||||
|
||||
let mut tasks = Vec::with_capacity(receivers.len());
|
||||
for (receiver, subprocedure) in receivers.iter_mut() {
|
||||
tasks.push(async move {
|
||||
watcher::wait(receiver).await.inspect_err(|e| {
|
||||
error!(e; "inflight subprocedure failed, parent procedure_id: {}, procedure: {}", procedure_ctx.procedure_id, subprocedure);
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
if fail_fast {
|
||||
try_join_all(tasks).await.context(WaitProcedureSnafu)?;
|
||||
return Ok(WaitForInflightSubproceduresResult::Success(
|
||||
subprocedures.iter().collect(),
|
||||
));
|
||||
}
|
||||
|
||||
// If fail_fast is false, we need to wait for all subprocedures to complete.
|
||||
let results = join_all(tasks).await;
|
||||
let failed_procedures_num = results.iter().filter(|r| r.is_err()).count();
|
||||
if failed_procedures_num == 0 {
|
||||
return Ok(WaitForInflightSubproceduresResult::Success(
|
||||
subprocedures.iter().collect(),
|
||||
));
|
||||
}
|
||||
warn!(
|
||||
"{} inflight subprocedures failed, total: {}, parent procedure_id: {}",
|
||||
failed_procedures_num,
|
||||
subprocedures.len(),
|
||||
procedure_ctx.procedure_id
|
||||
);
|
||||
|
||||
let mut failed_procedures = Vec::with_capacity(failed_procedures_num);
|
||||
let mut success_procedures = Vec::with_capacity(subprocedures.len() - failed_procedures_num);
|
||||
for (result, subprocedure) in results.into_iter().zip(subprocedures) {
|
||||
if result.is_err() {
|
||||
failed_procedures.push(subprocedure);
|
||||
} else {
|
||||
success_procedures.push(subprocedure);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(WaitForInflightSubproceduresResult::PartialSuccess(
|
||||
PartialSuccessResult {
|
||||
failed_procedures,
|
||||
success_procedures,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Context {
|
||||
pub node_manager: NodeManagerRef,
|
||||
@@ -413,6 +496,446 @@ pub struct Context {
|
||||
pub cache_invalidator: CacheInvalidatorRef,
|
||||
}
|
||||
|
||||
/// Metadata for an inflight physical table subprocedure.
|
||||
pub struct PhysicalTableMeta {
|
||||
pub procedure_id: ProcedureId,
|
||||
pub table_id: TableId,
|
||||
pub table_name: TableName,
|
||||
}
|
||||
|
||||
/// Metadata for an inflight logical table subprocedure.
|
||||
pub struct LogicalTableMeta {
|
||||
pub procedure_id: ProcedureId,
|
||||
pub physical_table_id: TableId,
|
||||
pub physical_table_name: TableName,
|
||||
pub logical_tables: Vec<(TableId, TableName)>,
|
||||
}
|
||||
|
||||
/// Metadata for an inflight database subprocedure.
|
||||
pub struct ReconcileDatabaseMeta {
|
||||
pub procedure_id: ProcedureId,
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
}
|
||||
|
||||
/// The inflight subprocedure metadata.
|
||||
pub enum SubprocedureMeta {
|
||||
PhysicalTable(PhysicalTableMeta),
|
||||
LogicalTable(LogicalTableMeta),
|
||||
Database(ReconcileDatabaseMeta),
|
||||
}
|
||||
|
||||
impl Display for SubprocedureMeta {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
SubprocedureMeta::PhysicalTable(meta) => {
|
||||
write!(
|
||||
f,
|
||||
"ReconcilePhysicalTable(procedure_id: {}, table_id: {}, table_name: {})",
|
||||
meta.procedure_id, meta.table_id, meta.table_name
|
||||
)
|
||||
}
|
||||
SubprocedureMeta::LogicalTable(meta) => {
|
||||
write!(
|
||||
f,
|
||||
"ReconcileLogicalTable(procedure_id: {}, physical_table_id: {}, physical_table_name: {}, logical_tables: {:?})",
|
||||
meta.procedure_id, meta.physical_table_id, meta.physical_table_name, meta.logical_tables
|
||||
)
|
||||
}
|
||||
SubprocedureMeta::Database(meta) => {
|
||||
write!(
|
||||
f,
|
||||
"ReconcileDatabase(procedure_id: {}, catalog: {}, schema: {})",
|
||||
meta.procedure_id, meta.catalog, meta.schema
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SubprocedureMeta {
|
||||
/// Creates a new logical table subprocedure metadata.
|
||||
pub fn new_logical_table(
|
||||
procedure_id: ProcedureId,
|
||||
physical_table_id: TableId,
|
||||
physical_table_name: TableName,
|
||||
logical_tables: Vec<(TableId, TableName)>,
|
||||
) -> Self {
|
||||
Self::LogicalTable(LogicalTableMeta {
|
||||
procedure_id,
|
||||
physical_table_id,
|
||||
physical_table_name,
|
||||
logical_tables,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new physical table subprocedure metadata.
|
||||
pub fn new_physical_table(
|
||||
procedure_id: ProcedureId,
|
||||
table_id: TableId,
|
||||
table_name: TableName,
|
||||
) -> Self {
|
||||
Self::PhysicalTable(PhysicalTableMeta {
|
||||
procedure_id,
|
||||
table_id,
|
||||
table_name,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new reconcile database subprocedure metadata.
|
||||
pub fn new_reconcile_database(
|
||||
procedure_id: ProcedureId,
|
||||
catalog: String,
|
||||
schema: String,
|
||||
) -> Self {
|
||||
Self::Database(ReconcileDatabaseMeta {
|
||||
procedure_id,
|
||||
catalog,
|
||||
schema,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the procedure id of the subprocedure.
|
||||
pub fn procedure_id(&self) -> ProcedureId {
|
||||
match self {
|
||||
SubprocedureMeta::PhysicalTable(meta) => meta.procedure_id,
|
||||
SubprocedureMeta::LogicalTable(meta) => meta.procedure_id,
|
||||
SubprocedureMeta::Database(meta) => meta.procedure_id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of tables will be reconciled.
|
||||
pub fn table_num(&self) -> usize {
|
||||
match self {
|
||||
SubprocedureMeta::PhysicalTable(_) => 1,
|
||||
SubprocedureMeta::LogicalTable(meta) => meta.logical_tables.len(),
|
||||
SubprocedureMeta::Database(_) => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of databases will be reconciled.
|
||||
pub fn database_num(&self) -> usize {
|
||||
match self {
|
||||
SubprocedureMeta::Database(_) => 1,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The metrics of reconciling catalog.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ReconcileCatalogMetrics {
|
||||
pub succeeded_databases: usize,
|
||||
pub failed_databases: usize,
|
||||
}
|
||||
|
||||
impl AddAssign for ReconcileCatalogMetrics {
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
self.succeeded_databases += other.succeeded_databases;
|
||||
self.failed_databases += other.failed_databases;
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ReconcileCatalogMetrics {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"succeeded_databases: {}, failed_databases: {}",
|
||||
self.succeeded_databases, self.failed_databases
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<WaitForInflightSubproceduresResult<'_>> for ReconcileCatalogMetrics {
|
||||
fn from(result: WaitForInflightSubproceduresResult<'_>) -> Self {
|
||||
match result {
|
||||
WaitForInflightSubproceduresResult::Success(subprocedures) => ReconcileCatalogMetrics {
|
||||
succeeded_databases: subprocedures.len(),
|
||||
failed_databases: 0,
|
||||
},
|
||||
WaitForInflightSubproceduresResult::PartialSuccess(PartialSuccessResult {
|
||||
failed_procedures,
|
||||
success_procedures,
|
||||
}) => {
|
||||
let succeeded_databases = success_procedures
|
||||
.iter()
|
||||
.map(|subprocedure| subprocedure.database_num())
|
||||
.sum();
|
||||
let failed_databases = failed_procedures
|
||||
.iter()
|
||||
.map(|subprocedure| subprocedure.database_num())
|
||||
.sum();
|
||||
ReconcileCatalogMetrics {
|
||||
succeeded_databases,
|
||||
failed_databases,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The metrics of reconciling database.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ReconcileDatabaseMetrics {
|
||||
pub succeeded_tables: usize,
|
||||
pub failed_tables: usize,
|
||||
pub succeeded_procedures: usize,
|
||||
pub failed_procedures: usize,
|
||||
}
|
||||
|
||||
impl Display for ReconcileDatabaseMetrics {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "succeeded_tables: {}, failed_tables: {}, succeeded_procedures: {}, failed_procedures: {}", self.succeeded_tables, self.failed_tables, self.succeeded_procedures, self.failed_procedures)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign for ReconcileDatabaseMetrics {
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
self.succeeded_tables += other.succeeded_tables;
|
||||
self.failed_tables += other.failed_tables;
|
||||
self.succeeded_procedures += other.succeeded_procedures;
|
||||
self.failed_procedures += other.failed_procedures;
|
||||
}
|
||||
}
|
||||
|
||||
impl From<WaitForInflightSubproceduresResult<'_>> for ReconcileDatabaseMetrics {
|
||||
fn from(result: WaitForInflightSubproceduresResult<'_>) -> Self {
|
||||
match result {
|
||||
WaitForInflightSubproceduresResult::Success(subprocedures) => {
|
||||
let table_num = subprocedures
|
||||
.iter()
|
||||
.map(|subprocedure| subprocedure.table_num())
|
||||
.sum();
|
||||
ReconcileDatabaseMetrics {
|
||||
succeeded_procedures: subprocedures.len(),
|
||||
failed_procedures: 0,
|
||||
succeeded_tables: table_num,
|
||||
failed_tables: 0,
|
||||
}
|
||||
}
|
||||
WaitForInflightSubproceduresResult::PartialSuccess(PartialSuccessResult {
|
||||
failed_procedures,
|
||||
success_procedures,
|
||||
}) => {
|
||||
let succeeded_tables = success_procedures
|
||||
.iter()
|
||||
.map(|subprocedure| subprocedure.table_num())
|
||||
.sum();
|
||||
let failed_tables = failed_procedures
|
||||
.iter()
|
||||
.map(|subprocedure| subprocedure.table_num())
|
||||
.sum();
|
||||
ReconcileDatabaseMetrics {
|
||||
succeeded_procedures: success_procedures.len(),
|
||||
failed_procedures: failed_procedures.len(),
|
||||
succeeded_tables,
|
||||
failed_tables,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The metrics of reconciling logical tables.
|
||||
#[derive(Clone)]
|
||||
pub struct ReconcileLogicalTableMetrics {
|
||||
pub start_time: Instant,
|
||||
pub update_table_info_count: usize,
|
||||
pub create_tables_count: usize,
|
||||
pub column_metadata_consistent_count: usize,
|
||||
pub column_metadata_inconsistent_count: usize,
|
||||
}
|
||||
|
||||
impl Default for ReconcileLogicalTableMetrics {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
start_time: Instant::now(),
|
||||
update_table_info_count: 0,
|
||||
create_tables_count: 0,
|
||||
column_metadata_consistent_count: 0,
|
||||
column_metadata_inconsistent_count: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CREATE_TABLES: &str = "create_tables";
|
||||
const UPDATE_TABLE_INFO: &str = "update_table_info";
|
||||
const COLUMN_METADATA_CONSISTENT: &str = "column_metadata_consistent";
|
||||
const COLUMN_METADATA_INCONSISTENT: &str = "column_metadata_inconsistent";
|
||||
|
||||
impl ReconcileLogicalTableMetrics {
|
||||
/// The total number of tables that have been reconciled.
|
||||
pub fn total_table_count(&self) -> usize {
|
||||
self.create_tables_count
|
||||
+ self.column_metadata_consistent_count
|
||||
+ self.column_metadata_inconsistent_count
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ReconcileLogicalTableMetrics {
|
||||
fn drop(&mut self) {
|
||||
let procedure_name = ReconcileLogicalTablesProcedure::TYPE_NAME;
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[procedure_name, metrics::TABLE_TYPE_LOGICAL, CREATE_TABLES])
|
||||
.inc_by(self.create_tables_count as u64);
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
procedure_name,
|
||||
metrics::TABLE_TYPE_LOGICAL,
|
||||
UPDATE_TABLE_INFO,
|
||||
])
|
||||
.inc_by(self.update_table_info_count as u64);
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
procedure_name,
|
||||
metrics::TABLE_TYPE_LOGICAL,
|
||||
COLUMN_METADATA_CONSISTENT,
|
||||
])
|
||||
.inc_by(self.column_metadata_consistent_count as u64);
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
procedure_name,
|
||||
metrics::TABLE_TYPE_LOGICAL,
|
||||
COLUMN_METADATA_INCONSISTENT,
|
||||
])
|
||||
.inc_by(self.column_metadata_inconsistent_count as u64);
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ReconcileLogicalTableMetrics {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let elapsed = self.start_time.elapsed();
|
||||
if self.create_tables_count > 0 {
|
||||
write!(f, "create_tables_count: {}, ", self.create_tables_count)?;
|
||||
}
|
||||
if self.update_table_info_count > 0 {
|
||||
write!(
|
||||
f,
|
||||
"update_table_info_count: {}, ",
|
||||
self.update_table_info_count
|
||||
)?;
|
||||
}
|
||||
if self.column_metadata_consistent_count > 0 {
|
||||
write!(
|
||||
f,
|
||||
"column_metadata_consistent_count: {}, ",
|
||||
self.column_metadata_consistent_count
|
||||
)?;
|
||||
}
|
||||
if self.column_metadata_inconsistent_count > 0 {
|
||||
write!(
|
||||
f,
|
||||
"column_metadata_inconsistent_count: {}, ",
|
||||
self.column_metadata_inconsistent_count
|
||||
)?;
|
||||
}
|
||||
|
||||
write!(
|
||||
f,
|
||||
"total_table_count: {}, elapsed: {:?}",
|
||||
self.total_table_count(),
|
||||
elapsed
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of resolving column metadata.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum ResolveColumnMetadataResult {
|
||||
Consistent,
|
||||
Inconsistent(ResolveStrategy),
|
||||
}
|
||||
|
||||
impl Display for ResolveColumnMetadataResult {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
ResolveColumnMetadataResult::Consistent => write!(f, "Consistent"),
|
||||
ResolveColumnMetadataResult::Inconsistent(strategy) => {
|
||||
let strategy_str = strategy.as_ref();
|
||||
write!(f, "Inconsistent({})", strategy_str)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The metrics of reconciling physical tables.
|
||||
#[derive(Clone)]
|
||||
pub struct ReconcileTableMetrics {
|
||||
/// The start time of the reconciliation.
|
||||
pub start_time: Instant,
|
||||
/// The result of resolving column metadata.
|
||||
pub resolve_column_metadata_result: Option<ResolveColumnMetadataResult>,
|
||||
/// Whether the table info has been updated.
|
||||
pub update_table_info: bool,
|
||||
}
|
||||
|
||||
impl Drop for ReconcileTableMetrics {
|
||||
fn drop(&mut self) {
|
||||
if let Some(resolve_column_metadata_result) = self.resolve_column_metadata_result {
|
||||
match resolve_column_metadata_result {
|
||||
ResolveColumnMetadataResult::Consistent => {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileTableProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
COLUMN_METADATA_CONSISTENT,
|
||||
])
|
||||
.inc();
|
||||
}
|
||||
ResolveColumnMetadataResult::Inconsistent(strategy) => {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileTableProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
COLUMN_METADATA_INCONSISTENT,
|
||||
])
|
||||
.inc();
|
||||
metrics::METRIC_META_RECONCILIATION_RESOLVED_COLUMN_METADATA
|
||||
.with_label_values(&[strategy.as_ref()])
|
||||
.inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.update_table_info {
|
||||
metrics::METRIC_META_RECONCILIATION_STATS
|
||||
.with_label_values(&[
|
||||
ReconcileTableProcedure::TYPE_NAME,
|
||||
metrics::TABLE_TYPE_PHYSICAL,
|
||||
UPDATE_TABLE_INFO,
|
||||
])
|
||||
.inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ReconcileTableMetrics {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
start_time: Instant::now(),
|
||||
resolve_column_metadata_result: None,
|
||||
update_table_info: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ReconcileTableMetrics {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let elapsed = self.start_time.elapsed();
|
||||
if let Some(resolve_column_metadata_result) = self.resolve_column_metadata_result {
|
||||
write!(
|
||||
f,
|
||||
"resolve_column_metadata_result: {}, ",
|
||||
resolve_column_metadata_result
|
||||
)?;
|
||||
}
|
||||
write!(
|
||||
f,
|
||||
"update_table_info: {}, elapsed: {:?}",
|
||||
self.update_table_info, elapsed
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
@@ -665,10 +1188,7 @@ mod tests {
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
});
|
||||
assert!(check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -676,18 +1196,12 @@ mod tests {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
new_column_metadatas.retain(|c| c.semantic_type != SemanticType::Timestamp);
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();
|
||||
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
new_column_metadatas.retain(|c| c.semantic_type != SemanticType::Tag);
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -700,10 +1214,7 @@ mod tests {
|
||||
{
|
||||
col.column_id = 100;
|
||||
}
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();
|
||||
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
@@ -713,10 +1224,7 @@ mod tests {
|
||||
{
|
||||
col.column_id = 100;
|
||||
}
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
check_column_metadata_invariants(&new_column_metadatas, &column_metadatas).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -280,6 +280,11 @@ pub enum LeaderState {
|
||||
/// - The [`Region`] was planned to migrate to another [`Peer`].
|
||||
#[serde(alias = "Downgraded")]
|
||||
Downgrading,
|
||||
/// The [`Region`] is in staging mode.
|
||||
///
|
||||
/// Disables checkpoint and compaction while maintaining write capability.
|
||||
/// But data ingested during this period are not visible to the user (hence staging).
|
||||
Staging,
|
||||
}
|
||||
|
||||
impl RegionRoute {
|
||||
@@ -294,6 +299,11 @@ impl RegionRoute {
|
||||
matches!(self.leader_state, Some(LeaderState::Downgrading))
|
||||
}
|
||||
|
||||
/// Returns true if the Leader [`Region`] is in staging mode.
|
||||
pub fn is_leader_staging(&self) -> bool {
|
||||
matches!(self.leader_state, Some(LeaderState::Staging))
|
||||
}
|
||||
|
||||
/// Marks the Leader [`Region`] as [`RegionState::Downgrading`].
|
||||
///
|
||||
/// We should downgrade a [`Region`] before deactivating it:
|
||||
@@ -310,6 +320,21 @@ impl RegionRoute {
|
||||
self.leader_state = Some(LeaderState::Downgrading)
|
||||
}
|
||||
|
||||
/// Sets the Leader [`Region`] to staging mode.
|
||||
pub fn set_leader_staging(&mut self) {
|
||||
self.leader_state = Some(LeaderState::Staging);
|
||||
// Reset leader_down_since as it's specific to downgrading
|
||||
self.leader_down_since = None;
|
||||
}
|
||||
|
||||
/// Clears the leader staging state, returning to normal leader mode.
|
||||
pub fn clear_leader_staging(&mut self) {
|
||||
if self.leader_state == Some(LeaderState::Staging) {
|
||||
self.leader_state = None;
|
||||
self.leader_down_since = None;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns how long since the leader is in `Downgraded` state.
|
||||
pub fn leader_down_millis(&self) -> Option<i64> {
|
||||
self.leader_down_since
|
||||
|
||||
15
src/common/meta/src/stats.rs
Normal file
15
src/common/meta/src/stats.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod topic;
|
||||
634
src/common/meta/src/stats/topic.rs
Normal file
634
src/common/meta/src/stats/topic.rs
Normal file
@@ -0,0 +1,634 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::{debug, warn};
|
||||
use datafusion_common::HashSet;
|
||||
|
||||
use crate::datanode::TopicStat;
|
||||
use crate::distributed_time_constants::{
|
||||
TOPIC_STATS_REPORT_INTERVAL_SECS, TOPIC_STATS_RETENTION_SECS,
|
||||
};
|
||||
use crate::DatanodeId;
|
||||
|
||||
pub type TopicStatsRegistryRef = Arc<TopicStatsRegistry>;
|
||||
|
||||
/// Manages statistics for all topics across the cluster.
|
||||
pub struct TopicStatsRegistry {
|
||||
inner: RwLock<TopicStatsStore>,
|
||||
}
|
||||
|
||||
impl Default for TopicStatsRegistry {
|
||||
fn default() -> Self {
|
||||
Self::new(
|
||||
Duration::from_secs(TOPIC_STATS_RETENTION_SECS),
|
||||
Duration::from_secs(TOPIC_STATS_REPORT_INTERVAL_SECS),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TopicStatsRegistry {
|
||||
/// Creates a new topic stats registry.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if the window size is zero.
|
||||
fn new(retention: Duration, window_size: Duration) -> Self {
|
||||
let history_limit = (retention.as_secs() / window_size.as_secs()).max(10) as usize;
|
||||
Self {
|
||||
inner: RwLock::new(TopicStatsStore::new(history_limit, window_size)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a topic stat for a given datanode at a specific timestamp.
|
||||
pub fn add_stat(&self, datanode_id: DatanodeId, stat: &TopicStat, millis_ts: i64) {
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
inner.add_stat(datanode_id, stat, millis_ts);
|
||||
}
|
||||
|
||||
/// Adds a list of topic stats for a given datanode at a specific timestamp.
|
||||
pub fn add_stats(&self, datanode_id: DatanodeId, stats: &[TopicStat], millis_ts: i64) {
|
||||
if stats.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
for stat in stats {
|
||||
inner.add_stat(datanode_id, stat, millis_ts);
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the calculated topic stat for a given topic.
|
||||
pub fn get_calculated_topic_stat(
|
||||
&self,
|
||||
topic: &str,
|
||||
period: Duration,
|
||||
) -> Option<CalculatedTopicStat> {
|
||||
let inner = self.inner.read().unwrap();
|
||||
inner.get_calculated_topic_stat(topic, period)
|
||||
}
|
||||
|
||||
/// Gets the latest entry id and timestamp for a given topic.
|
||||
pub fn get_latest_entry_id(&self, topic: &str) -> Option<(u64, i64)> {
|
||||
let inner = self.inner.read().unwrap();
|
||||
inner.get_latest_entry_id(topic)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Default)]
|
||||
struct HistoryTopicStat {
|
||||
/// The latest entry id of the topic.
|
||||
pub latest_entry_id: u64,
|
||||
/// The total size in bytes of records appended to the topic.
|
||||
pub record_size: u64,
|
||||
/// The total number of records appended to the topic.
|
||||
pub record_num: u64,
|
||||
/// The start timestamp of the stat.
|
||||
start_ts: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PartialTopicStat {
|
||||
/// The latest entry id of the topic.
|
||||
pub latest_entry_id: u64,
|
||||
/// The total size in bytes of records appended to the topic.
|
||||
pub record_size: u64,
|
||||
/// The total number of records appended to the topic.
|
||||
pub record_num: u64,
|
||||
/// The timestamp of the partial topic stat.
|
||||
pub timestamp: i64,
|
||||
}
|
||||
|
||||
struct ActiveBucket {
|
||||
buffer: HashMap<DatanodeId, HashMap<String, PartialTopicStat>>,
|
||||
start_ts: i64,
|
||||
window_size: Duration,
|
||||
}
|
||||
|
||||
impl ActiveBucket {
|
||||
fn new(timestamp: i64, window_sec: Duration) -> Self {
|
||||
Self {
|
||||
buffer: HashMap::new(),
|
||||
start_ts: timestamp,
|
||||
window_size: window_sec,
|
||||
}
|
||||
}
|
||||
|
||||
fn acceptable_ts(&self, millis_ts: i64) -> bool {
|
||||
let acceptable = millis_ts >= self.start_ts
|
||||
&& millis_ts < self.start_ts + self.window_size.as_millis() as i64;
|
||||
if !acceptable {
|
||||
debug!(
|
||||
"acceptable range: ts >= {} && ts < {}, ts: {}",
|
||||
self.start_ts,
|
||||
self.start_ts + self.window_size.as_millis() as i64,
|
||||
millis_ts
|
||||
);
|
||||
}
|
||||
acceptable
|
||||
}
|
||||
|
||||
/// Add a topic stat to the current topic stats.
|
||||
///
|
||||
/// Returns true if the topic stat is added successfully (stale stat will be ignored directly),
|
||||
/// false if the topic stat is out of the window.
|
||||
fn add_stat(&mut self, datanode_id: DatanodeId, stat: &TopicStat, millis_ts: i64) -> bool {
|
||||
if !self.acceptable_ts(millis_ts) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let datanode_stats = self.buffer.entry(datanode_id).or_default();
|
||||
|
||||
// Overwrite the topic stat if it already exists.
|
||||
if let Some(prev) = datanode_stats.get_mut(&stat.topic) {
|
||||
if millis_ts > prev.timestamp {
|
||||
*prev = PartialTopicStat {
|
||||
latest_entry_id: stat.latest_entry_id,
|
||||
record_size: stat.record_size,
|
||||
record_num: stat.record_num,
|
||||
timestamp: millis_ts,
|
||||
};
|
||||
} else {
|
||||
warn!(
|
||||
"Ignore stale topic stat for topic: {}, timestamp: {}, last recorded timestamp: {}",
|
||||
stat.topic, millis_ts, prev.timestamp
|
||||
);
|
||||
}
|
||||
} else {
|
||||
datanode_stats.insert(
|
||||
stat.topic.to_string(),
|
||||
PartialTopicStat {
|
||||
latest_entry_id: stat.latest_entry_id,
|
||||
record_size: stat.record_size,
|
||||
record_num: stat.record_num,
|
||||
timestamp: millis_ts,
|
||||
},
|
||||
);
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn merge(self) -> HashMap<String, HistoryTopicStat> {
|
||||
let all_topics = self
|
||||
.buffer
|
||||
.values()
|
||||
.flat_map(|stats| stats.keys())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let mut output = HashMap::with_capacity(all_topics.len());
|
||||
for topic in all_topics {
|
||||
let stats = self
|
||||
.buffer
|
||||
.values()
|
||||
.flat_map(|stats| stats.get(topic))
|
||||
.collect::<Vec<_>>();
|
||||
debug!("stats: {:?} for topic: {}", stats, topic);
|
||||
let latest_entry_id = stats
|
||||
.iter()
|
||||
.map(|stat| stat.latest_entry_id)
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
let record_size = stats.iter().map(|stat| stat.record_size).sum::<u64>();
|
||||
let record_num = stats.iter().map(|stat| stat.record_num).sum::<u64>();
|
||||
|
||||
output.insert(
|
||||
topic.to_string(),
|
||||
HistoryTopicStat {
|
||||
latest_entry_id,
|
||||
record_size,
|
||||
record_num,
|
||||
start_ts: self.start_ts,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
/// Get the partial topic stat of a datanode.
|
||||
#[cfg(test)]
|
||||
fn get_stat(&self, datanode_id: DatanodeId, topic: &str) -> Option<&PartialTopicStat> {
|
||||
self.buffer
|
||||
.get(&datanode_id)
|
||||
.and_then(|stats| stats.get(topic))
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages topic statistics over time, including active and historical buckets.
|
||||
struct TopicStatsStore {
|
||||
/// The currently active bucket collecting stats.
|
||||
active_bucket: Option<ActiveBucket>,
|
||||
/// Historical merged buckets, grouped by topic.
|
||||
history_by_topic: HashMap<String, VecDeque<HistoryTopicStat>>,
|
||||
/// Maximum number of historical windows to keep per topic.
|
||||
history_limit: usize,
|
||||
/// Duration of each stats window in seconds.
|
||||
window_size: Duration,
|
||||
}
|
||||
|
||||
impl TopicStatsStore {
|
||||
/// Create a new topic stats.
|
||||
fn new(history_limit: usize, window_size: Duration) -> Self {
|
||||
Self {
|
||||
active_bucket: None,
|
||||
history_by_topic: HashMap::new(),
|
||||
history_limit,
|
||||
window_size,
|
||||
}
|
||||
}
|
||||
|
||||
/// Aligns the timestamp to the nearest second.
|
||||
fn align_ts(millis_ts: i64) -> i64 {
|
||||
(millis_ts / 1000) * 1000
|
||||
}
|
||||
|
||||
fn rotate_active_bucket(&mut self, start_ts: i64) {
|
||||
let aligned_ts = Self::align_ts(start_ts);
|
||||
if let Some(old_bucket) = self.active_bucket.take() {
|
||||
let merged = old_bucket.merge();
|
||||
for (topic, stat) in merged {
|
||||
debug!(
|
||||
"Merge current topic: {}, stats into history: {:?}",
|
||||
topic, stat
|
||||
);
|
||||
let history = self.history_by_topic.entry(topic).or_default();
|
||||
history.push_back(stat);
|
||||
if history.len() > self.history_limit {
|
||||
history.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.active_bucket = Some(ActiveBucket::new(aligned_ts, self.window_size));
|
||||
}
|
||||
|
||||
/// Adds a topic stat for a given datanode at a specific timestamp.
|
||||
fn add_stat(&mut self, datanode_id: DatanodeId, stat: &TopicStat, millis_ts: i64) {
|
||||
let aligned_ts = Self::align_ts(millis_ts);
|
||||
|
||||
let need_rotate = match &self.active_bucket {
|
||||
Some(bucket) => !bucket.acceptable_ts(aligned_ts),
|
||||
None => true,
|
||||
};
|
||||
|
||||
if need_rotate {
|
||||
debug!("Rotate active bucket at ts: {}", aligned_ts);
|
||||
self.rotate_active_bucket(aligned_ts);
|
||||
}
|
||||
|
||||
// Safety: The current topic stats is initialized in the previous step.
|
||||
let active_bucket = self.active_bucket.as_mut().unwrap();
|
||||
debug_assert!(active_bucket.add_stat(datanode_id, stat, millis_ts));
|
||||
}
|
||||
|
||||
/// Gets the calculated topic stat for a given topic.
|
||||
fn get_calculated_topic_stat(
|
||||
&self,
|
||||
topic: &str,
|
||||
period: Duration,
|
||||
) -> Option<CalculatedTopicStat> {
|
||||
let stats = self.history_by_topic.get(topic)?;
|
||||
calculate_topic_stat(stats, period)
|
||||
}
|
||||
|
||||
/// Gets the latest entry id and timestamp for a given topic.
|
||||
fn get_latest_entry_id(&self, topic: &str) -> Option<(u64, i64)> {
|
||||
self.history_by_topic.get(topic).and_then(|stats| {
|
||||
stats
|
||||
.back()
|
||||
.map(|stat| (stat.latest_entry_id, stat.start_ts))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The calculated topic stat.
|
||||
///
|
||||
/// The average record size is the average record size of the topic over the window.
|
||||
/// The start timestamp is the timestamp of the window start.
|
||||
/// The end timestamp is the timestamp of the window end.
|
||||
pub struct CalculatedTopicStat {
|
||||
pub avg_record_size: usize,
|
||||
pub start_ts: i64,
|
||||
pub end_ts: i64,
|
||||
}
|
||||
|
||||
/// Calculates the average record size for a topic within a specified time window based on recent merged statistics.
|
||||
///
|
||||
/// Returns `Some(CalculatedTopicStat)` if the calculation is successful, or `None` if insufficient data is available.
|
||||
fn calculate_topic_stat(
|
||||
stats: &VecDeque<HistoryTopicStat>,
|
||||
period: Duration,
|
||||
) -> Option<CalculatedTopicStat> {
|
||||
if stats.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let last_stat = stats.back().unwrap();
|
||||
let first_stat = stats.front().unwrap();
|
||||
// Not enough stats data.
|
||||
if first_stat.start_ts + period.as_millis() as i64 > last_stat.start_ts {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the first stat whose timestamp is less than the last stat's timestamp - period.as_millis() as i64.
|
||||
// TODO(weny): Use binary search to find the target stat.
|
||||
let target_stat = stats
|
||||
.iter()
|
||||
.rev()
|
||||
.skip(1)
|
||||
.find(|stat| (stat.start_ts + period.as_millis() as i64) < last_stat.start_ts);
|
||||
|
||||
let target_stat = target_stat?;
|
||||
|
||||
// The target stat's record size and record num should be less than the last stat's record size and record num.
|
||||
if target_stat.record_size > last_stat.record_size
|
||||
|| target_stat.record_num > last_stat.record_num
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
// Safety: the last stat's record size and record num must be greater than the target stat's record size and record num.
|
||||
let record_size = last_stat.record_size - target_stat.record_size;
|
||||
let record_num = last_stat.record_num - target_stat.record_num;
|
||||
let avg_record_size = record_size.checked_div(record_num).unwrap_or(0) as usize;
|
||||
|
||||
let start_ts = target_stat.start_ts;
|
||||
let end_ts = last_stat.start_ts;
|
||||
Some(CalculatedTopicStat {
|
||||
avg_record_size,
|
||||
start_ts,
|
||||
end_ts,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use common_time::util::current_time_millis;
|
||||
|
||||
use super::*;
|
||||
use crate::datanode::TopicStat;
|
||||
|
||||
fn merged_stat(ts: i64, record_size: u64, record_num: u64) -> HistoryTopicStat {
|
||||
HistoryTopicStat {
|
||||
start_ts: ts,
|
||||
record_size,
|
||||
record_num,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_stats() {
|
||||
let stats: VecDeque<HistoryTopicStat> = VecDeque::new();
|
||||
assert!(calculate_topic_stat(&stats, Duration::from_secs(10)).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_stat() {
|
||||
let mut stats = VecDeque::new();
|
||||
stats.push_back(merged_stat(1000, 100, 2));
|
||||
assert!(calculate_topic_stat(&stats, Duration::from_secs(10)).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_target_stat_found() {
|
||||
let mut stats = VecDeque::new();
|
||||
stats.push_back(merged_stat(1000, 100, 2));
|
||||
stats.push_back(merged_stat(2000, 200, 4));
|
||||
// window_sec is large, so no stat will be found
|
||||
assert!(calculate_topic_stat(&stats, Duration::from_secs(100)).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_target_stat_found() {
|
||||
let mut stats = VecDeque::new();
|
||||
stats.push_back(merged_stat(1000, 100, 2));
|
||||
stats.push_back(merged_stat(3000, 200, 4));
|
||||
stats.push_back(merged_stat(6000, 600, 6));
|
||||
let result = calculate_topic_stat(&stats, Duration::from_secs(2));
|
||||
assert!(result.is_some());
|
||||
let stat = result.unwrap();
|
||||
assert_eq!(stat.avg_record_size, 200); // (600 - 200) / (6 - 4)
|
||||
assert_eq!(stat.start_ts, 3000);
|
||||
assert_eq!(stat.end_ts, 6000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_target_stat_decreasing() {
|
||||
let mut stats = VecDeque::new();
|
||||
stats.push_back(merged_stat(1000, 100, 2));
|
||||
stats.push_back(merged_stat(3000, 200, 4));
|
||||
stats.push_back(merged_stat(6000, 100, 1)); // Reset or something wrong
|
||||
let result = calculate_topic_stat(&stats, Duration::from_secs(2));
|
||||
assert!(result.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_stats_target_found() {
|
||||
let mut stats = VecDeque::new();
|
||||
stats.push_back(merged_stat(1000, 100, 2));
|
||||
stats.push_back(merged_stat(2000, 200, 4));
|
||||
stats.push_back(merged_stat(4000, 400, 8));
|
||||
stats.push_back(merged_stat(8000, 800, 16));
|
||||
let result = calculate_topic_stat(&stats, Duration::from_secs(3));
|
||||
assert!(result.is_some());
|
||||
let stat = result.unwrap();
|
||||
assert_eq!(stat.avg_record_size, 50); // (800 - 400) / (16 - 8)
|
||||
assert_eq!(stat.start_ts, 4000);
|
||||
assert_eq!(stat.end_ts, 8000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_active_bucket() {
|
||||
let ts = current_time_millis();
|
||||
let window_size = Duration::from_secs(3);
|
||||
let mut active_bucket = ActiveBucket::new(ts, window_size);
|
||||
|
||||
assert!(active_bucket.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: "test".to_string(),
|
||||
latest_entry_id: 1,
|
||||
record_size: 256,
|
||||
record_num: 1,
|
||||
},
|
||||
ts + 10,
|
||||
));
|
||||
|
||||
assert!(active_bucket.add_stat(
|
||||
1,
|
||||
&TopicStat {
|
||||
topic: "test".to_string(),
|
||||
latest_entry_id: 10,
|
||||
record_size: 5120,
|
||||
record_num: 10,
|
||||
},
|
||||
ts + 10,
|
||||
));
|
||||
|
||||
assert!(active_bucket.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: "test1".to_string(),
|
||||
latest_entry_id: 2,
|
||||
record_size: 128,
|
||||
record_num: 2,
|
||||
},
|
||||
ts + 9,
|
||||
));
|
||||
|
||||
// Out of the window.
|
||||
assert!(!active_bucket.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: "test".to_string(),
|
||||
latest_entry_id: 2,
|
||||
record_size: 2,
|
||||
record_num: 2,
|
||||
},
|
||||
ts + window_size.as_millis() as i64 + 1,
|
||||
));
|
||||
|
||||
// Out of the window.
|
||||
assert!(!active_bucket.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: "test".to_string(),
|
||||
latest_entry_id: 2,
|
||||
record_size: 2,
|
||||
record_num: 2,
|
||||
},
|
||||
ts - 1
|
||||
));
|
||||
|
||||
// Overwrite the topic stat if the timestamp is larger.
|
||||
assert!(active_bucket.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: "test".to_string(),
|
||||
latest_entry_id: 3,
|
||||
record_size: 1024,
|
||||
record_num: 3,
|
||||
},
|
||||
ts + 11,
|
||||
));
|
||||
assert_eq!(
|
||||
active_bucket.get_stat(0, "test").unwrap().latest_entry_id,
|
||||
3
|
||||
);
|
||||
|
||||
// Ignore stale topic stat.
|
||||
assert!(active_bucket.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: "test".to_string(),
|
||||
latest_entry_id: 2,
|
||||
record_size: 512,
|
||||
record_num: 2,
|
||||
},
|
||||
ts + 9,
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
active_bucket.get_stat(0, "test").unwrap().latest_entry_id,
|
||||
3
|
||||
);
|
||||
|
||||
let merged = active_bucket.merge();
|
||||
assert_eq!(merged.len(), 2);
|
||||
assert_eq!(merged.get("test").unwrap().latest_entry_id, 10);
|
||||
assert_eq!(merged.get("test").unwrap().record_size, 5120 + 1024);
|
||||
assert_eq!(merged.get("test").unwrap().record_num, 10 + 3);
|
||||
|
||||
assert_eq!(merged.get("test1").unwrap().latest_entry_id, 2);
|
||||
assert_eq!(merged.get("test1").unwrap().record_size, 128);
|
||||
assert_eq!(merged.get("test1").unwrap().record_num, 2);
|
||||
assert_eq!(merged.get("test1").unwrap().start_ts, ts);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topic_stats() {
|
||||
let topic_name = "test";
|
||||
let window_size = Duration::from_secs(60);
|
||||
let mut topic_stats = TopicStatsStore::new(5, window_size);
|
||||
let ts = TopicStatsStore::align_ts(current_time_millis());
|
||||
debug!("add stat at ts: {}", ts);
|
||||
topic_stats.add_stat(
|
||||
0,
|
||||
&TopicStat {
|
||||
topic: topic_name.to_string(),
|
||||
latest_entry_id: 1,
|
||||
record_size: 1024,
|
||||
record_num: 1,
|
||||
},
|
||||
ts,
|
||||
);
|
||||
|
||||
debug!("add stat at ts: {}", ts + window_size.as_millis() as i64);
|
||||
topic_stats.add_stat(
|
||||
1,
|
||||
&TopicStat {
|
||||
topic: topic_name.to_string(),
|
||||
latest_entry_id: 4,
|
||||
record_size: 4096,
|
||||
record_num: 4,
|
||||
},
|
||||
ts + window_size.as_millis() as i64 - 1,
|
||||
);
|
||||
|
||||
topic_stats.add_stat(
|
||||
1,
|
||||
&TopicStat {
|
||||
topic: "another_topic".to_string(),
|
||||
latest_entry_id: 4,
|
||||
record_size: 4096,
|
||||
record_num: 4,
|
||||
},
|
||||
ts + window_size.as_millis() as i64 - 1,
|
||||
);
|
||||
|
||||
debug!(
|
||||
"add stat at ts: {}",
|
||||
ts + window_size.as_millis() as i64 + 1
|
||||
);
|
||||
// Add a stat that is out of the window.
|
||||
topic_stats.add_stat(
|
||||
1,
|
||||
&TopicStat {
|
||||
topic: topic_name.to_string(),
|
||||
latest_entry_id: 5,
|
||||
record_size: 8192,
|
||||
record_num: 5,
|
||||
},
|
||||
ts + window_size.as_millis() as i64,
|
||||
);
|
||||
|
||||
let history = topic_stats.history_by_topic.get(topic_name).unwrap();
|
||||
assert_eq!(history.len(), 1);
|
||||
assert_eq!(
|
||||
history[0],
|
||||
HistoryTopicStat {
|
||||
latest_entry_id: 4,
|
||||
record_size: 1024 + 4096,
|
||||
record_num: 1 + 4,
|
||||
start_ts: ts,
|
||||
}
|
||||
);
|
||||
assert!(topic_stats.active_bucket.is_some());
|
||||
}
|
||||
}
|
||||
@@ -251,11 +251,11 @@ pub async fn test_kafka_topic_pool(
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
/// Skip the test if the environment variable `GT_KAFKA_ENDPOINTS` is not set.
|
||||
/// Skip the test if the environment variable `GT_POSTGRES_ENDPOINTS` is not set.
|
||||
///
|
||||
/// The format of the environment variable is:
|
||||
/// ```
|
||||
/// GT_KAFKA_ENDPOINTS=localhost:9092,localhost:9093
|
||||
/// GT_POSTGRES_ENDPOINTS=localhost:9092,localhost:9093
|
||||
/// ```
|
||||
macro_rules! maybe_skip_postgres_integration_test {
|
||||
() => {
|
||||
@@ -267,11 +267,11 @@ macro_rules! maybe_skip_postgres_integration_test {
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
/// Skip the test if the environment variable `GT_KAFKA_ENDPOINTS` is not set.
|
||||
/// Skip the test if the environment variable `GT_MYSQL_ENDPOINTS` is not set.
|
||||
///
|
||||
/// The format of the environment variable is:
|
||||
/// ```
|
||||
/// GT_KAFKA_ENDPOINTS=localhost:9092,localhost:9093
|
||||
/// GT_MYSQL_ENDPOINTS=localhost:9092,localhost:9093
|
||||
/// ```
|
||||
macro_rules! maybe_skip_mysql_integration_test {
|
||||
() => {
|
||||
|
||||
@@ -25,7 +25,8 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{
|
||||
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, CreateKafkaWalTopicSnafu,
|
||||
KafkaGetOffsetSnafu, KafkaPartitionClientSnafu, ProduceRecordSnafu, Result, TlsConfigSnafu,
|
||||
KafkaGetOffsetSnafu, KafkaPartitionClientSnafu, ProduceRecordSnafu, ResolveKafkaEndpointSnafu,
|
||||
Result, TlsConfigSnafu,
|
||||
};
|
||||
|
||||
// Each topic only has one partition for now.
|
||||
@@ -208,10 +209,10 @@ impl KafkaTopicCreator {
|
||||
/// Builds a kafka [Client](rskafka::client::Client).
|
||||
pub async fn build_kafka_client(connection: &KafkaConnectionConfig) -> Result<Client> {
|
||||
// Builds an kafka controller client for creating topics.
|
||||
let mut builder = ClientBuilder::new(connection.broker_endpoints.clone())
|
||||
.backoff_config(DEFAULT_BACKOFF_CONFIG)
|
||||
.connect_timeout(Some(connection.connect_timeout))
|
||||
.timeout(Some(connection.timeout));
|
||||
let broker_endpoints = common_wal::resolve_to_ipv4(&connection.broker_endpoints)
|
||||
.await
|
||||
.context(ResolveKafkaEndpointSnafu)?;
|
||||
let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(DEFAULT_BACKOFF_CONFIG);
|
||||
if let Some(sasl) = &connection.sasl {
|
||||
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
|
||||
};
|
||||
|
||||
@@ -260,21 +260,20 @@ impl ErrorExt for Error {
|
||||
| Error::PutPoison { source, .. }
|
||||
| Error::DeletePoison { source, .. }
|
||||
| Error::GetPoison { source, .. }
|
||||
| Error::CheckStatus { source, .. } => source.status_code(),
|
||||
| Error::CheckStatus { source, .. }
|
||||
| Error::RetryLater { source, .. } => source.status_code(),
|
||||
|
||||
Error::ToJson { .. }
|
||||
| Error::DeleteState { .. }
|
||||
| Error::FromJson { .. }
|
||||
| Error::WaitWatcher { .. }
|
||||
| Error::RetryLater { .. }
|
||||
| Error::RollbackProcedureRecovered { .. }
|
||||
| Error::TooManyRunningProcedures { .. }
|
||||
| Error::PoisonKeyNotDefined { .. } => StatusCode::Internal,
|
||||
| Error::WaitWatcher { .. } => StatusCode::Internal,
|
||||
|
||||
Error::RetryTimesExceeded { .. }
|
||||
| Error::RollbackTimesExceeded { .. }
|
||||
| Error::ManagerNotStart { .. }
|
||||
| Error::ManagerPasued { .. } => StatusCode::IllegalState,
|
||||
| Error::ManagerPasued { .. }
|
||||
| Error::TooManyRunningProcedures { .. }
|
||||
| Error::RollbackProcedureRecovered { .. } => StatusCode::IllegalState,
|
||||
|
||||
Error::RollbackNotSupported { .. } => StatusCode::Unsupported,
|
||||
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
|
||||
@@ -283,7 +282,8 @@ impl ErrorExt for Error {
|
||||
Error::ProcedurePanic { .. }
|
||||
| Error::ParseSegmentKey { .. }
|
||||
| Error::Unexpected { .. }
|
||||
| &Error::ProcedureNotFound { .. } => StatusCode::Unexpected,
|
||||
| &Error::ProcedureNotFound { .. }
|
||||
| Error::PoisonKeyNotDefined { .. } => StatusCode::Unexpected,
|
||||
Error::ProcedureExec { source, .. } => source.status_code(),
|
||||
Error::StartRemoveOutdatedMetaTask { source, .. }
|
||||
| Error::StopRemoveOutdatedMetaTask { source, .. } => source.status_code(),
|
||||
|
||||
@@ -8,4 +8,5 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde.workspace = true
|
||||
strum.workspace = true
|
||||
|
||||
@@ -12,11 +12,14 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum::{AsRefStr, Display, EnumString};
|
||||
|
||||
/// Defines the read preference for frontend route operations,
|
||||
/// determining whether to read from the region leader or follower.
|
||||
#[derive(Debug, Clone, Copy, Default, EnumString, Display, AsRefStr, PartialEq, Eq)]
|
||||
#[derive(
|
||||
Debug, Clone, Copy, Default, EnumString, Display, AsRefStr, PartialEq, Serialize, Deserialize,
|
||||
)]
|
||||
pub enum ReadPreference {
|
||||
#[default]
|
||||
// Reads all operations from the region leader. This is the default mode.
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(let_chains)]
|
||||
#![feature(duration_constructors)]
|
||||
|
||||
pub mod logging;
|
||||
mod macros;
|
||||
|
||||
@@ -103,14 +103,15 @@ pub struct SlowQueryOptions {
|
||||
|
||||
/// The threshold of slow queries.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub threshold: Option<Duration>,
|
||||
pub threshold: Duration,
|
||||
|
||||
/// The sample ratio of slow queries.
|
||||
pub sample_ratio: Option<f64>,
|
||||
pub sample_ratio: f64,
|
||||
|
||||
/// The table TTL of `slow_queries` system table. Default is "30d".
|
||||
/// The table TTL of `slow_queries` system table. Default is "90d".
|
||||
/// It's used when `record_type` is `SystemTable`.
|
||||
pub ttl: Option<String>,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub ttl: Duration,
|
||||
}
|
||||
|
||||
impl Default for SlowQueryOptions {
|
||||
@@ -118,9 +119,9 @@ impl Default for SlowQueryOptions {
|
||||
Self {
|
||||
enable: true,
|
||||
record_type: SlowQueriesRecordType::SystemTable,
|
||||
threshold: Some(Duration::from_secs(30)),
|
||||
sample_ratio: Some(1.0),
|
||||
ttl: Some("30d".to_string()),
|
||||
threshold: Duration::from_secs(30),
|
||||
sample_ratio: 1.0,
|
||||
ttl: Duration::from_days(90),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -128,7 +129,9 @@ impl Default for SlowQueryOptions {
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Copy, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SlowQueriesRecordType {
|
||||
/// Record the slow query in the system table.
|
||||
SystemTable,
|
||||
/// Record the slow query in a specific logs file.
|
||||
Log,
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
use tracing_opentelemetry::OpenTelemetrySpanExt;
|
||||
|
||||
// An wrapper for `Futures` that provides tracing instrument adapters.
|
||||
// An wapper for `Futures` that provides tracing instrument adapters.
|
||||
pub trait FutureExt: std::future::Future + Sized {
|
||||
fn trace(self, span: tracing::span::Span) -> tracing::instrument::Instrumented<Self>;
|
||||
}
|
||||
|
||||
@@ -189,8 +189,6 @@ mod tests {
|
||||
client_cert_path: None,
|
||||
client_key_path: None,
|
||||
}),
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
timeout: Duration::from_secs(3),
|
||||
},
|
||||
kafka_topic: KafkaTopicConfig {
|
||||
num_topics: 32,
|
||||
@@ -223,8 +221,6 @@ mod tests {
|
||||
client_cert_path: None,
|
||||
client_key_path: None,
|
||||
}),
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
timeout: Duration::from_secs(3),
|
||||
},
|
||||
max_batch_bytes: ReadableSize::mb(1),
|
||||
consumer_wait_timeout: Duration::from_millis(100),
|
||||
|
||||
@@ -161,12 +161,6 @@ pub struct KafkaConnectionConfig {
|
||||
pub sasl: Option<KafkaClientSasl>,
|
||||
/// Client TLS config
|
||||
pub tls: Option<KafkaClientTls>,
|
||||
/// The connect timeout for kafka client.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub connect_timeout: Duration,
|
||||
/// The timeout for kafka client.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for KafkaConnectionConfig {
|
||||
@@ -175,8 +169,6 @@ impl Default for KafkaConnectionConfig {
|
||||
broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
|
||||
sasl: None,
|
||||
tls: None,
|
||||
connect_timeout: Duration::from_secs(3),
|
||||
timeout: Duration::from_secs(3),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,26 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_telemetry::warn;
|
||||
use futures_util::future::BoxFuture;
|
||||
|
||||
pub async fn run_test_with_kafka_wal<F>(test: F)
|
||||
where
|
||||
F: FnOnce(Vec<String>) -> BoxFuture<'static, ()>,
|
||||
{
|
||||
let Ok(endpoints) = std::env::var("GT_KAFKA_ENDPOINTS") else {
|
||||
warn!("The endpoints is empty, skipping the test");
|
||||
return;
|
||||
};
|
||||
|
||||
let endpoints = endpoints
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
test(endpoints).await
|
||||
}
|
||||
|
||||
/// Get the kafka endpoints from the environment variable `GT_KAFKA_ENDPOINTS`.
|
||||
///
|
||||
/// The format of the environment variable is:
|
||||
|
||||
@@ -16,7 +16,6 @@ api.workspace = true
|
||||
arrow-flight.workspace = true
|
||||
async-trait.workspace = true
|
||||
bytes.workspace = true
|
||||
catalog.workspace = true
|
||||
client.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
|
||||
@@ -18,11 +18,11 @@ use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use catalog::memory::MemoryCatalogManager;
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
||||
use common_meta::cache::{LayeredCacheRegistry, SchemaCacheRef, TableSchemaCacheRef};
|
||||
use common_meta::datanode::TopicStatsReporter;
|
||||
use common_meta::key::datanode_table::{DatanodeTableManager, DatanodeTableValue};
|
||||
use common_meta::key::runtime_switch::RuntimeSwitchManager;
|
||||
use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
@@ -44,7 +44,7 @@ use mito2::config::MitoConfig;
|
||||
use mito2::engine::{MitoEngine, MitoEngineBuilder};
|
||||
use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
|
||||
use object_store::util::normalize_dir;
|
||||
use query::dummy_catalog::TableProviderFactoryRef;
|
||||
use query::dummy_catalog::{DummyCatalogManager, TableProviderFactoryRef};
|
||||
use query::QueryEngineFactory;
|
||||
use servers::export_metrics::ExportMetricsTask;
|
||||
use servers::server::ServerHandlers;
|
||||
@@ -163,6 +163,7 @@ pub struct DatanodeBuilder {
|
||||
meta_client: Option<MetaClientRef>,
|
||||
kv_backend: KvBackendRef,
|
||||
cache_registry: Option<Arc<LayeredCacheRegistry>>,
|
||||
topic_stats_reporter: Option<Box<dyn TopicStatsReporter>>,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extension_range_provider_factory: Option<mito2::extension::BoxedExtensionRangeProviderFactory>,
|
||||
}
|
||||
@@ -178,6 +179,7 @@ impl DatanodeBuilder {
|
||||
cache_registry: None,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extension_range_provider_factory: None,
|
||||
topic_stats_reporter: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -377,7 +379,7 @@ impl DatanodeBuilder {
|
||||
|
||||
let query_engine_factory = QueryEngineFactory::new_with_plugins(
|
||||
// query engine in datanode only executes plan with resolved table source.
|
||||
MemoryCatalogManager::with_default_setup(),
|
||||
DummyCatalogManager::arc(),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
@@ -415,6 +417,9 @@ impl DatanodeBuilder {
|
||||
for engine in engines {
|
||||
region_server.register_engine(engine);
|
||||
}
|
||||
if let Some(topic_stats_reporter) = self.topic_stats_reporter.take() {
|
||||
region_server.set_topic_stats_reporter(topic_stats_reporter);
|
||||
}
|
||||
|
||||
Ok(region_server)
|
||||
}
|
||||
@@ -528,10 +533,13 @@ impl DatanodeBuilder {
|
||||
None
|
||||
};
|
||||
|
||||
let log_store =
|
||||
Self::build_kafka_log_store(kafka_config, global_index_collector).await?;
|
||||
self.topic_stats_reporter = Some(log_store.topic_stats_reporter());
|
||||
let builder = MitoEngineBuilder::new(
|
||||
&opts.storage.data_home,
|
||||
config,
|
||||
Self::build_kafka_log_store(kafka_config, global_index_collector).await?,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
@@ -669,16 +677,24 @@ async fn open_all_regions(
|
||||
ignore_nonexistent_region,
|
||||
)
|
||||
.await?;
|
||||
ensure!(
|
||||
open_regions.len() == num_regions,
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Expected to open {} of regions, only {} of regions has opened",
|
||||
num_regions,
|
||||
open_regions.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
if !ignore_nonexistent_region {
|
||||
ensure!(
|
||||
open_regions.len() == num_regions,
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Expected to open {} of regions, only {} of regions has opened",
|
||||
num_regions,
|
||||
open_regions.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
} else if open_regions.len() != num_regions {
|
||||
warn!(
|
||||
"ignore nonexistent region, expected to open {} of regions, only {} of regions has opened",
|
||||
num_regions,
|
||||
open_regions.len()
|
||||
);
|
||||
}
|
||||
|
||||
for region_id in open_regions {
|
||||
if open_with_writable {
|
||||
@@ -697,6 +713,7 @@ async fn open_all_regions(
|
||||
follower_regions.len()
|
||||
);
|
||||
let mut region_requests = Vec::with_capacity(follower_regions.len());
|
||||
let num_regions = follower_regions.len();
|
||||
for (region_id, engine, store_path, options) in follower_regions {
|
||||
let table_dir = table_dir(&store_path, region_id.table_id());
|
||||
region_requests.push((
|
||||
@@ -719,16 +736,24 @@ async fn open_all_regions(
|
||||
)
|
||||
.await?;
|
||||
|
||||
ensure!(
|
||||
open_regions.len() == num_regions,
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Expected to open {} of follower regions, only {} of regions has opened",
|
||||
num_regions,
|
||||
open_regions.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
if !ignore_nonexistent_region {
|
||||
ensure!(
|
||||
open_regions.len() == num_regions,
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Expected to open {} of follower regions, only {} of regions has opened",
|
||||
num_regions,
|
||||
open_regions.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
} else if open_regions.len() != num_regions {
|
||||
warn!(
|
||||
"ignore nonexistent region, expected to open {} of follower regions, only {} of regions has opened",
|
||||
num_regions,
|
||||
open_regions.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
info!("all regions are opened");
|
||||
|
||||
@@ -79,13 +79,6 @@ pub enum Error {
|
||||
source: common_query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Catalog not found: {}", name))]
|
||||
CatalogNotFound {
|
||||
name: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Schema not found: {}", name))]
|
||||
SchemaNotFound {
|
||||
name: String,
|
||||
@@ -159,13 +152,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to access catalog"))]
|
||||
Catalog {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: catalog::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to initialize meta client"))]
|
||||
MetaClientInit {
|
||||
#[snafu(implicit)]
|
||||
@@ -429,12 +415,10 @@ impl ErrorExt for Error {
|
||||
InvalidSql { .. }
|
||||
| IllegalPrimaryKeysDef { .. }
|
||||
| MissingTimestampColumn { .. }
|
||||
| CatalogNotFound { .. }
|
||||
| SchemaNotFound { .. }
|
||||
| SchemaExists { .. }
|
||||
| MissingNodeId { .. }
|
||||
| ColumnNoneDefaultValue { .. }
|
||||
| Catalog { .. }
|
||||
| MissingRequiredField { .. }
|
||||
| RegionEngineNotFound { .. }
|
||||
| ParseAddr { .. }
|
||||
|
||||
@@ -279,10 +279,12 @@ impl HeartbeatTask {
|
||||
}
|
||||
_ = &mut sleep => {
|
||||
let region_stats = Self::load_region_stats(®ion_server_clone);
|
||||
let topic_stats = region_server_clone.topic_stats();
|
||||
let now = Instant::now();
|
||||
let duration_since_epoch = (now - epoch).as_millis() as u64;
|
||||
let req = HeartbeatRequest {
|
||||
region_stats,
|
||||
topic_stats,
|
||||
duration_since_epoch,
|
||||
..heartbeat_request.clone()
|
||||
};
|
||||
|
||||
@@ -50,6 +50,15 @@ impl HandlerContext {
|
||||
error: None,
|
||||
}))
|
||||
}
|
||||
Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
|
||||
error!(err; "Failed to convert region to follower - invalid transition");
|
||||
Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
}))
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to convert region to {}", SettableRegionRoleState::Follower);
|
||||
Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
@@ -117,6 +126,15 @@ impl HandlerContext {
|
||||
error: None,
|
||||
}));
|
||||
}
|
||||
Ok(SetRegionRoleStateResponse::InvalidTransition(err)) => {
|
||||
error!(err; "Failed to convert region to downgrading leader - invalid transition");
|
||||
return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
exists: true,
|
||||
error: Some(format!("{err:?}")),
|
||||
}));
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to convert region to downgrading leader");
|
||||
return Some(InstructionReply::DowngradeRegion(DowngradeRegionReply {
|
||||
|
||||
@@ -12,8 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::instruction::{FlushRegions, InstructionReply, SimpleReply};
|
||||
use common_telemetry::warn;
|
||||
use common_telemetry::{info, warn};
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::region_request::{RegionFlushRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -27,26 +29,38 @@ impl HandlerContext {
|
||||
flush_regions: FlushRegions,
|
||||
) -> BoxFuture<'static, Option<InstructionReply>> {
|
||||
Box::pin(async move {
|
||||
for region_id in flush_regions.region_ids {
|
||||
let start_time = Instant::now();
|
||||
for region_id in &flush_regions.region_ids {
|
||||
let request = RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
});
|
||||
let result = self.region_server.handle_request(region_id, request).await;
|
||||
let now = Instant::now();
|
||||
let result = self.region_server.handle_request(*region_id, request).await;
|
||||
let elapsed = now.elapsed();
|
||||
info!("Flush region: {}, elapsed: {:?}", region_id, elapsed);
|
||||
|
||||
match result {
|
||||
Ok(_) => {}
|
||||
Err(error::Error::RegionNotFound { .. }) => {
|
||||
warn!("Received a flush region instruction from meta, but target region: {region_id} is not found.");
|
||||
warn!(
|
||||
"Received a flush region instruction from meta, but target region: {} is not found., elapsed: {:?}",
|
||||
region_id,
|
||||
elapsed
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to flush region: {region_id}, error: {err}",
|
||||
region_id = region_id,
|
||||
err = err,
|
||||
"Failed to flush region: {}, error: {}, elapsed: {:?}",
|
||||
region_id, err, elapsed
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
let elapsed = start_time.elapsed();
|
||||
info!(
|
||||
"Flush regions: {:?}, elapsed: {:?}",
|
||||
flush_regions.region_ids, elapsed
|
||||
);
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::meta::TopicStat;
|
||||
use api::v1::region::sync_request::ManifestInfo;
|
||||
use api::v1::region::{
|
||||
region_request, ListMetadataRequest, RegionResponse as RegionResponseV1, SyncRequest,
|
||||
@@ -29,6 +30,7 @@ use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::datanode::TopicStatsReporter;
|
||||
use common_query::request::QueryRequest;
|
||||
use common_query::OutputData;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
@@ -135,10 +137,16 @@ impl RegionServer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers an engine.
|
||||
pub fn register_engine(&mut self, engine: RegionEngineRef) {
|
||||
self.inner.register_engine(engine);
|
||||
}
|
||||
|
||||
/// Sets the topic stats.
|
||||
pub fn set_topic_stats_reporter(&mut self, topic_stats_reporter: Box<dyn TopicStatsReporter>) {
|
||||
self.inner.set_topic_stats_reporter(topic_stats_reporter);
|
||||
}
|
||||
|
||||
/// Finds the region's engine by its id. If the region is not ready, returns `None`.
|
||||
pub fn find_engine(&self, region_id: RegionId) -> Result<Option<RegionEngineRef>> {
|
||||
match self.inner.get_engine(region_id, &RegionChange::None) {
|
||||
@@ -248,7 +256,7 @@ impl RegionServer {
|
||||
None
|
||||
};
|
||||
|
||||
let ctx: Option<session::context::QueryContext> = request.header.as_ref().map(|h| h.into());
|
||||
let ctx = request.header.as_ref().map(|h| h.into());
|
||||
let query_ctx = Arc::new(ctx.unwrap_or_else(|| QueryContextBuilder::default().build()));
|
||||
|
||||
let provider = self
|
||||
@@ -305,6 +313,24 @@ impl RegionServer {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns the reportable topics.
|
||||
pub fn topic_stats(&self) -> Vec<TopicStat> {
|
||||
let mut reporter = self.inner.topic_stats_reporter.write().unwrap();
|
||||
let Some(reporter) = reporter.as_mut() else {
|
||||
return vec![];
|
||||
};
|
||||
reporter
|
||||
.reportable_topics()
|
||||
.into_iter()
|
||||
.map(|stat| TopicStat {
|
||||
topic_name: stat.topic,
|
||||
record_size: stat.record_size,
|
||||
record_num: stat.record_num,
|
||||
latest_entry_id: stat.latest_entry_id,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn is_region_leader(&self, region_id: RegionId) -> Option<bool> {
|
||||
self.inner.region_map.get(®ion_id).and_then(|engine| {
|
||||
engine.role(region_id).map(|role| match role {
|
||||
@@ -669,6 +695,8 @@ struct RegionServerInner {
|
||||
// The number of queries allowed to be executed at the same time.
|
||||
// Act as last line of defense on datanode to prevent query overloading.
|
||||
parallelism: Option<RegionServerParallelism>,
|
||||
// The topic stats reporter.
|
||||
topic_stats_reporter: RwLock<Option<Box<dyn TopicStatsReporter>>>,
|
||||
}
|
||||
|
||||
struct RegionServerParallelism {
|
||||
@@ -734,6 +762,7 @@ impl RegionServerInner {
|
||||
event_listener,
|
||||
table_provider_factory,
|
||||
parallelism,
|
||||
topic_stats_reporter: RwLock::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -746,6 +775,11 @@ impl RegionServerInner {
|
||||
.insert(engine_name.to_string(), engine);
|
||||
}
|
||||
|
||||
pub fn set_topic_stats_reporter(&self, topic_stats_reporter: Box<dyn TopicStatsReporter>) {
|
||||
info!("Set topic stats reporter");
|
||||
*self.topic_stats_reporter.write().unwrap() = Some(topic_stats_reporter);
|
||||
}
|
||||
|
||||
fn get_engine(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
|
||||
@@ -143,6 +143,7 @@ define_timestamp_with_unit!(Millisecond);
|
||||
define_timestamp_with_unit!(Microsecond);
|
||||
define_timestamp_with_unit!(Nanosecond);
|
||||
|
||||
/// Converts a timestamp array to a primitive array and the time unit.
|
||||
pub fn timestamp_array_to_primitive(
|
||||
ts_array: &ArrayRef,
|
||||
) -> Option<(
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use session::ReadPreference;
|
||||
|
||||
pub(crate) mod engine;
|
||||
pub(crate) mod frontend_client;
|
||||
@@ -54,6 +55,8 @@ pub struct BatchingModeOptions {
|
||||
pub experimental_max_filter_num_per_query: usize,
|
||||
/// Time window merge distance
|
||||
pub experimental_time_window_merge_threshold: usize,
|
||||
/// Read preference of the Frontend client.
|
||||
pub read_preference: ReadPreference,
|
||||
}
|
||||
|
||||
impl Default for BatchingModeOptions {
|
||||
@@ -68,6 +71,7 @@ impl Default for BatchingModeOptions {
|
||||
experimental_frontend_activity_timeout: Duration::from_secs(60),
|
||||
experimental_max_filter_num_per_query: 20,
|
||||
experimental_time_window_merge_threshold: 3,
|
||||
read_preference: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ use rand::rng;
|
||||
use rand::seq::SliceRandom;
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use session::hints::READ_PREFERENCE_HINT;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
@@ -363,7 +364,10 @@ impl FrontendClient {
|
||||
.handle_with_retry(
|
||||
req.clone(),
|
||||
batch_opts.experimental_grpc_max_retries,
|
||||
&[(QUERY_PARALLELISM_HINT, &query.parallelism.to_string())],
|
||||
&[
|
||||
(QUERY_PARALLELISM_HINT, &query.parallelism.to_string()),
|
||||
(READ_PREFERENCE_HINT, batch_opts.read_preference.as_ref()),
|
||||
],
|
||||
)
|
||||
.await
|
||||
.with_context(|_| InvalidRequestSnafu {
|
||||
|
||||
@@ -218,7 +218,6 @@ impl HeartbeatTask {
|
||||
if let Some(message) = message {
|
||||
Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report)
|
||||
} else {
|
||||
warn!("Sender has been dropped, exiting the heartbeat loop");
|
||||
// Receives None that means Sender was dropped, we need to break the current loop
|
||||
break
|
||||
}
|
||||
@@ -260,11 +259,7 @@ impl HeartbeatTask {
|
||||
error!(e; "Error while handling heartbeat response");
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
warn!("Heartbeat response stream closed");
|
||||
capture_self.start_with_retry(retry_interval).await;
|
||||
break;
|
||||
}
|
||||
Ok(None) => break,
|
||||
Err(e) => {
|
||||
error!(e; "Occur error while reading heartbeat response");
|
||||
capture_self.start_with_retry(retry_interval).await;
|
||||
|
||||
@@ -71,6 +71,6 @@ pub struct LinearStagePlan {
|
||||
/// The key expressions to use for the lookup relation.
|
||||
pub lookup_key: Vec<ScalarExpr>,
|
||||
/// The closure to apply to the concatenation of the key columns,
|
||||
/// the stream value columns, and the lookup value columns.
|
||||
/// the stream value columns, and the lookup value colunms.
|
||||
pub closure: JoinFilter,
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-datasource.workspace = true
|
||||
common-error.workspace = true
|
||||
common-event-recorder.workspace = true
|
||||
common-frontend.workspace = true
|
||||
common-function.workspace = true
|
||||
common-grpc.workspace = true
|
||||
@@ -45,6 +46,7 @@ datafusion-expr.workspace = true
|
||||
datanode.workspace = true
|
||||
datatypes.workspace = true
|
||||
futures.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
log-query.workspace = true
|
||||
|
||||
100
src/frontend/src/events.rs
Normal file
100
src/frontend/src/events.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_event_recorder::error::{InsertEventsSnafu, Result};
|
||||
use common_event_recorder::{
|
||||
build_row_inserts_request, group_events_by_type, Event, EventHandler, EventHandlerOptions,
|
||||
};
|
||||
use common_frontend::slow_query_event::SLOW_QUERY_EVENT_TYPE;
|
||||
use humantime::format_duration;
|
||||
use operator::insert::InserterRef;
|
||||
use operator::statement::StatementExecutorRef;
|
||||
use session::context::QueryContextBuilder;
|
||||
use snafu::ResultExt;
|
||||
use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY};
|
||||
|
||||
/// EventHandlerImpl is the default event handler implementation in frontend.
|
||||
pub struct EventHandlerImpl {
|
||||
inserter: InserterRef,
|
||||
statement_executor: StatementExecutorRef,
|
||||
slow_query_ttl: Duration,
|
||||
global_ttl: Duration,
|
||||
}
|
||||
|
||||
impl EventHandlerImpl {
|
||||
/// Create a new EventHandlerImpl.
|
||||
pub fn new(
|
||||
inserter: InserterRef,
|
||||
statement_executor: StatementExecutorRef,
|
||||
slow_query_ttl: Duration,
|
||||
global_ttl: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
inserter,
|
||||
statement_executor,
|
||||
slow_query_ttl,
|
||||
global_ttl,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EventHandler for EventHandlerImpl {
|
||||
async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()> {
|
||||
let event_groups = group_events_by_type(events);
|
||||
|
||||
for (event_type, events) in event_groups {
|
||||
let opts = self.options(event_type);
|
||||
let query_ctx = QueryContextBuilder::default()
|
||||
.current_catalog(DEFAULT_CATALOG_NAME.to_string())
|
||||
.current_schema(DEFAULT_PRIVATE_SCHEMA_NAME.to_string())
|
||||
.set_extension(TTL_KEY.to_string(), format_duration(opts.ttl).to_string())
|
||||
.set_extension(APPEND_MODE_KEY.to_string(), opts.append_mode.to_string())
|
||||
.build()
|
||||
.into();
|
||||
|
||||
self.inserter
|
||||
.handle_row_inserts(
|
||||
build_row_inserts_request(&events)?,
|
||||
query_ctx,
|
||||
&self.statement_executor,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(InsertEventsSnafu)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn options(&self, event_type: &str) -> EventHandlerOptions {
|
||||
match event_type {
|
||||
SLOW_QUERY_EVENT_TYPE => EventHandlerOptions {
|
||||
ttl: self.slow_query_ttl,
|
||||
append_mode: true,
|
||||
},
|
||||
_ => EventHandlerOptions {
|
||||
ttl: self.global_ttl,
|
||||
append_mode: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_config::config::Configurable;
|
||||
use common_event_recorder::EventRecorderOptions;
|
||||
use common_options::datanode::DatanodeClientOptions;
|
||||
use common_options::memory::MemoryOptions;
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions};
|
||||
@@ -62,8 +63,10 @@ pub struct FrontendOptions {
|
||||
pub tracing: TracingOptions,
|
||||
pub query: QueryOptions,
|
||||
pub max_in_flight_write_bytes: Option<ReadableSize>,
|
||||
pub slow_query: Option<SlowQueryOptions>,
|
||||
pub slow_query: SlowQueryOptions,
|
||||
pub memory: MemoryOptions,
|
||||
/// The event recorder options.
|
||||
pub event_recorder: EventRecorderOptions,
|
||||
}
|
||||
|
||||
impl Default for FrontendOptions {
|
||||
@@ -89,8 +92,9 @@ impl Default for FrontendOptions {
|
||||
tracing: TracingOptions::default(),
|
||||
query: QueryOptions::default(),
|
||||
max_in_flight_write_bytes: None,
|
||||
slow_query: Some(SlowQueryOptions::default()),
|
||||
slow_query: SlowQueryOptions::default(),
|
||||
memory: MemoryOptions::default(),
|
||||
event_recorder: EventRecorderOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_meta::heartbeat::handler::{
|
||||
};
|
||||
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
|
||||
use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_telemetry::{debug, error, info};
|
||||
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
|
||||
use servers::addrs;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
@@ -42,8 +42,8 @@ use crate::metrics::{HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT};
|
||||
pub struct HeartbeatTask {
|
||||
peer_addr: String,
|
||||
meta_client: Arc<MetaClient>,
|
||||
report_interval: Duration,
|
||||
retry_interval: Duration,
|
||||
report_interval: u64,
|
||||
retry_interval: u64,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
start_time_ms: u64,
|
||||
}
|
||||
@@ -58,8 +58,8 @@ impl HeartbeatTask {
|
||||
HeartbeatTask {
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
meta_client,
|
||||
report_interval: heartbeat_opts.interval,
|
||||
retry_interval: heartbeat_opts.retry_interval,
|
||||
report_interval: heartbeat_opts.interval.as_millis() as u64,
|
||||
retry_interval: heartbeat_opts.retry_interval.as_millis() as u64,
|
||||
resp_handler_executor,
|
||||
start_time_ms: common_time::util::current_time_millis() as u64,
|
||||
}
|
||||
@@ -103,15 +103,13 @@ impl HeartbeatTask {
|
||||
HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc();
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
warn!("Heartbeat response stream closed");
|
||||
capture_self.start_with_retry(retry_interval).await;
|
||||
break;
|
||||
}
|
||||
Ok(None) => break,
|
||||
Err(e) => {
|
||||
HEARTBEAT_RECV_COUNT.with_label_values(&["error"]).inc();
|
||||
error!(e; "Occur error while reading heartbeat response");
|
||||
capture_self.start_with_retry(retry_interval).await;
|
||||
capture_self
|
||||
.start_with_retry(Duration::from_millis(retry_interval))
|
||||
.await;
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -179,13 +177,12 @@ impl HeartbeatTask {
|
||||
if let Some(message) = message {
|
||||
Self::new_heartbeat_request(&heartbeat_request, Some(message))
|
||||
} else {
|
||||
warn!("Sender has been dropped, exiting the heartbeat loop");
|
||||
// Receives None that means Sender was dropped, we need to break the current loop
|
||||
break
|
||||
}
|
||||
}
|
||||
_ = &mut sleep => {
|
||||
sleep.as_mut().reset(Instant::now() + report_interval);
|
||||
sleep.as_mut().reset(Instant::now() + Duration::from_millis(report_interval));
|
||||
Self::new_heartbeat_request(&heartbeat_request, None)
|
||||
}
|
||||
};
|
||||
|
||||
@@ -32,13 +32,16 @@ use std::time::{Duration, SystemTime};
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use catalog::process_manager::{ProcessManagerRef, QueryStatement as CatalogQueryStatement};
|
||||
use catalog::process_manager::{
|
||||
ProcessManagerRef, QueryStatement as CatalogQueryStatement, SlowQueryTimer,
|
||||
};
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::OutputData;
|
||||
use common_base::cancellation::CancellableFuture;
|
||||
use common_base::Plugins;
|
||||
use common_config::KvBackendConfig;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_event_recorder::EventRecorderRef;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::key::runtime_switch::RuntimeSwitchManager;
|
||||
use common_meta::key::table_name::TableNameKey;
|
||||
@@ -53,6 +56,7 @@ use common_procedure::ProcedureManagerRef;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::error::StreamTimeoutSnafu;
|
||||
use common_recordbatch::RecordBatchStreamWrapper;
|
||||
use common_telemetry::logging::SlowQueryOptions;
|
||||
use common_telemetry::{debug, error, info, tracing};
|
||||
use dashmap::DashMap;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
@@ -99,7 +103,6 @@ use crate::error::{
|
||||
StatementTimeoutSnafu, TableOperationSnafu,
|
||||
};
|
||||
use crate::limiter::LimiterRef;
|
||||
use crate::slow_query_recorder::SlowQueryRecorder;
|
||||
use crate::stream_wrapper::CancellableStreamWrapper;
|
||||
|
||||
lazy_static! {
|
||||
@@ -119,9 +122,10 @@ pub struct Instance {
|
||||
inserter: InserterRef,
|
||||
deleter: DeleterRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
slow_query_recorder: Option<SlowQueryRecorder>,
|
||||
event_recorder: Option<EventRecorderRef>,
|
||||
limiter: Option<LimiterRef>,
|
||||
process_manager: ProcessManagerRef,
|
||||
slow_query_options: SlowQueryOptions,
|
||||
|
||||
// cache for otlp metrics
|
||||
// first layer key: db-string
|
||||
@@ -222,9 +226,20 @@ impl Instance {
|
||||
let query_interceptor = query_interceptor.as_ref();
|
||||
|
||||
if should_capture_statement(Some(&stmt)) {
|
||||
let slow_query_timer = self.slow_query_recorder.as_ref().and_then(|recorder| {
|
||||
recorder.start(CatalogQueryStatement::Sql(stmt.clone()), query_ctx.clone())
|
||||
});
|
||||
let slow_query_timer = self
|
||||
.slow_query_options
|
||||
.enable
|
||||
.then(|| self.event_recorder.clone())
|
||||
.flatten()
|
||||
.map(|event_recorder| {
|
||||
SlowQueryTimer::new(
|
||||
CatalogQueryStatement::Sql(stmt.clone()),
|
||||
self.slow_query_options.threshold,
|
||||
self.slow_query_options.sample_ratio,
|
||||
self.slow_query_options.record_type,
|
||||
event_recorder,
|
||||
)
|
||||
});
|
||||
|
||||
let ticket = self.process_manager.register_query(
|
||||
query_ctx.current_catalog().to_string(),
|
||||
@@ -586,9 +601,20 @@ impl SqlQueryHandler for Instance {
|
||||
// It's safe to unwrap here because we've already checked the type.
|
||||
let stmt = stmt.unwrap();
|
||||
let query = stmt.to_string();
|
||||
let slow_query_timer = self.slow_query_recorder.as_ref().and_then(|recorder| {
|
||||
recorder.start(CatalogQueryStatement::Sql(stmt), query_ctx.clone())
|
||||
});
|
||||
let slow_query_timer = self
|
||||
.slow_query_options
|
||||
.enable
|
||||
.then(|| self.event_recorder.clone())
|
||||
.flatten()
|
||||
.map(|event_recorder| {
|
||||
SlowQueryTimer::new(
|
||||
CatalogQueryStatement::Sql(stmt.clone()),
|
||||
self.slow_query_options.threshold,
|
||||
self.slow_query_options.sample_ratio,
|
||||
self.slow_query_options.record_type,
|
||||
event_recorder,
|
||||
)
|
||||
});
|
||||
|
||||
let ticket = self.process_manager.register_query(
|
||||
query_ctx.current_catalog().to_string(),
|
||||
@@ -739,9 +765,19 @@ impl PrometheusHandler for Instance {
|
||||
let query = query_statement.to_string();
|
||||
|
||||
let slow_query_timer = self
|
||||
.slow_query_recorder
|
||||
.as_ref()
|
||||
.and_then(|recorder| recorder.start(query_statement, query_ctx.clone()));
|
||||
.slow_query_options
|
||||
.enable
|
||||
.then(|| self.event_recorder.clone())
|
||||
.flatten()
|
||||
.map(|event_recorder| {
|
||||
SlowQueryTimer::new(
|
||||
query_statement,
|
||||
self.slow_query_options.threshold,
|
||||
self.slow_query_options.sample_ratio,
|
||||
self.slow_query_options.record_type,
|
||||
event_recorder,
|
||||
)
|
||||
});
|
||||
|
||||
let ticket = self.process_manager.register_query(
|
||||
query_ctx.current_catalog().to_string(),
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user