mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 05:12:54 +00:00
Compare commits
44 Commits
v0.17.0-ni
...
v0.17.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38456638f8 | ||
|
|
97c0b1f5c1 | ||
|
|
4fc7f12360 | ||
|
|
ed17997449 | ||
|
|
849ae8ebb6 | ||
|
|
a0587e2e87 | ||
|
|
1ed71169ac | ||
|
|
e62f0e2b64 | ||
|
|
f92e753a34 | ||
|
|
a22b016f90 | ||
|
|
7a9fa99069 | ||
|
|
d808e7be7e | ||
|
|
8e22fcfd5c | ||
|
|
26729c31a6 | ||
|
|
b73617eaba | ||
|
|
3b909f63e3 | ||
|
|
0d4e07eddd | ||
|
|
b94ce9019d | ||
|
|
3dcd40c4ba | ||
|
|
a67803d0e9 | ||
|
|
aa7e7942f8 | ||
|
|
f1b7581dc3 | ||
|
|
cd761df369 | ||
|
|
0cea6ae64d | ||
|
|
8bf772fb50 | ||
|
|
9c1240921d | ||
|
|
eb52129a91 | ||
|
|
a0a2b40cbe | ||
|
|
067c4458d6 | ||
|
|
4e9c31bf5c | ||
|
|
9320a6ddaa | ||
|
|
4c9fcb7dee | ||
|
|
9dc16772fe | ||
|
|
6ee91f6af4 | ||
|
|
9175fa643d | ||
|
|
0e962844ac | ||
|
|
246b832d79 | ||
|
|
e62a022d76 | ||
|
|
e595885dc6 | ||
|
|
dd3432e6ca | ||
|
|
ab96703d8f | ||
|
|
73add808a6 | ||
|
|
1234911ed3 | ||
|
|
d57c0db9e6 |
@@ -25,8 +25,8 @@ runs:
|
||||
--set persistence.size=2Gi \
|
||||
--create-namespace \
|
||||
--set global.security.allowInsecureImages=true \
|
||||
--set image.registry=public.ecr.aws/i8k6a5e1 \
|
||||
--set image.repository=bitnami/etcd \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/etcd \
|
||||
--set image.tag=3.6.1-debian-12-r3 \
|
||||
--version 12.0.8 \
|
||||
-n ${{ inputs.namespace }}
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
logging:
|
||||
level: "info"
|
||||
format: "json"
|
||||
filters:
|
||||
- log_store=debug
|
||||
meta:
|
||||
configData: |-
|
||||
[runtime]
|
||||
|
||||
@@ -23,8 +23,8 @@ runs:
|
||||
--set listeners.controller.protocol=PLAINTEXT \
|
||||
--set listeners.client.protocol=PLAINTEXT \
|
||||
--create-namespace \
|
||||
--set image.registry=public.ecr.aws/i8k6a5e1 \
|
||||
--set image.repository=bitnami/kafka \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/kafka \
|
||||
--set image.tag=3.9.0-debian-12-r1 \
|
||||
--version 31.0.0 \
|
||||
-n ${{ inputs.namespace }}
|
||||
|
||||
@@ -21,8 +21,8 @@ runs:
|
||||
--install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \
|
||||
--set replicaCount=${{ inputs.postgres-replicas }} \
|
||||
--set global.security.allowInsecureImages=true \
|
||||
--set image.registry=public.ecr.aws/i8k6a5e1 \
|
||||
--set image.repository=bitnami/postgresql \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/postgresql \
|
||||
--set image.tag=17.5.0-debian-12-r3 \
|
||||
--version 16.7.4 \
|
||||
--set persistence.size=${{ inputs.storage-size }} \
|
||||
|
||||
40
.github/scripts/deploy-greptimedb.sh
vendored
40
.github/scripts/deploy-greptimedb.sh
vendored
@@ -3,12 +3,14 @@
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.24.0}"
|
||||
KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.32.0}"
|
||||
ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
|
||||
DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
|
||||
GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
|
||||
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
|
||||
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
|
||||
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
|
||||
ETCD_CHART_VERSION="${ETCD_CHART_VERSION:-12.0.8}"
|
||||
ETCD_IMAGE_TAG="${ETCD_IMAGE_TAG:-3.6.1-debian-12-r3}"
|
||||
|
||||
# Create a cluster with 1 control-plane node and 5 workers.
|
||||
function create_kind_cluster() {
|
||||
@@ -35,10 +37,16 @@ function add_greptime_chart() {
|
||||
function deploy_etcd_cluster() {
|
||||
local namespace="$1"
|
||||
|
||||
helm install etcd "$ETCD_CHART" \
|
||||
helm upgrade --install etcd "$ETCD_CHART" \
|
||||
--version "$ETCD_CHART_VERSION" \
|
||||
--create-namespace \
|
||||
--set replicaCount=3 \
|
||||
--set auth.rbac.create=false \
|
||||
--set auth.rbac.token.enabled=false \
|
||||
--set global.security.allowInsecureImages=true \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/etcd \
|
||||
--set image.tag="$ETCD_IMAGE_TAG" \
|
||||
-n "$namespace"
|
||||
|
||||
# Wait for etcd cluster to be ready.
|
||||
@@ -48,7 +56,8 @@ function deploy_etcd_cluster() {
|
||||
# Deploy greptimedb-operator.
|
||||
function deploy_greptimedb_operator() {
|
||||
# Use the latest chart and image.
|
||||
helm install greptimedb-operator greptime/greptimedb-operator \
|
||||
helm upgrade --install greptimedb-operator greptime/greptimedb-operator \
|
||||
--create-namespace \
|
||||
--set image.tag=latest \
|
||||
-n "$DEFAULT_INSTALL_NAMESPACE"
|
||||
|
||||
@@ -66,9 +75,11 @@ function deploy_greptimedb_cluster() {
|
||||
|
||||
deploy_etcd_cluster "$install_namespace"
|
||||
|
||||
helm install "$cluster_name" greptime/greptimedb-cluster \
|
||||
helm upgrade --install "$cluster_name" greptime/greptimedb-cluster \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
|
||||
-n "$install_namespace"
|
||||
|
||||
# Wait for greptimedb cluster to be ready.
|
||||
@@ -101,15 +112,17 @@ function deploy_greptimedb_cluster_with_s3_storage() {
|
||||
|
||||
deploy_etcd_cluster "$install_namespace"
|
||||
|
||||
helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
|
||||
helm upgrade --install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
|
||||
--set storage.s3.region="$AWS_REGION" \
|
||||
--set storage.s3.root="$DATA_ROOT" \
|
||||
--set storage.credentials.secretName=s3-credentials \
|
||||
--set storage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
|
||||
--set storage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
|
||||
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
|
||||
--set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
|
||||
--set objectStorage.s3.region="$AWS_REGION" \
|
||||
--set objectStorage.s3.root="$DATA_ROOT" \
|
||||
--set objectStorage.credentials.secretName=s3-credentials \
|
||||
--set objectStorage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
|
||||
--set objectStorage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
|
||||
|
||||
# Wait for greptimedb cluster to be ready.
|
||||
while true; do
|
||||
@@ -134,7 +147,8 @@ function deploy_greptimedb_cluster_with_s3_storage() {
|
||||
# Deploy standalone greptimedb.
|
||||
# It will expose cluster service ports as '34000', '34001', '34002', '34003' to local access.
|
||||
function deploy_standalone_greptimedb() {
|
||||
helm install greptimedb-standalone greptime/greptimedb-standalone \
|
||||
helm upgrade --install greptimedb-standalone greptime/greptimedb-standalone \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
-n "$DEFAULT_INSTALL_NAMESPACE"
|
||||
|
||||
|
||||
10
.github/scripts/pull-test-deps-images.sh
vendored
10
.github/scripts/pull-test-deps-images.sh
vendored
@@ -7,11 +7,11 @@ set -e
|
||||
MAX_RETRIES=3
|
||||
|
||||
IMAGES=(
|
||||
"public.ecr.aws/i8k6a5e1/bitnami/zookeeper:3.7"
|
||||
"public.ecr.aws/i8k6a5e1/bitnami/kafka:3.9.0-debian-12-r1"
|
||||
"public.ecr.aws/i8k6a5e1/bitnami/etcd:3.6.1-debian-12-r3"
|
||||
"public.ecr.aws/i8k6a5e1/bitnami/minio:2024"
|
||||
"public.ecr.aws/i8k6a5e1/bitnami/mysql:5.7"
|
||||
"greptime/zookeeper:3.7"
|
||||
"greptime/kafka:3.9.0-debian-12-r1"
|
||||
"greptime/etcd:3.6.1-debian-12-r3"
|
||||
"greptime/minio:2024"
|
||||
"greptime/mysql:5.7"
|
||||
)
|
||||
|
||||
for image in "${IMAGES[@]}"; do
|
||||
|
||||
6
.github/workflows/semantic-pull-request.yml
vendored
6
.github/workflows/semantic-pull-request.yml
vendored
@@ -1,7 +1,7 @@
|
||||
name: "Semantic Pull Request"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- reopened
|
||||
@@ -12,9 +12,9 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: write
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
check:
|
||||
|
||||
162
Cargo.lock
generated
162
Cargo.lock
generated
@@ -218,7 +218,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -737,7 +737,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1387,7 +1387,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1422,7 +1422,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1763,7 +1763,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1807,7 +1807,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -1816,7 +1816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1848,7 +1848,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1889,7 +1889,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1951,7 +1951,7 @@ dependencies = [
|
||||
"snafu 0.8.6",
|
||||
"stat",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -1997,7 +1997,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -2019,11 +2019,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2049,7 +2049,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -2084,7 +2084,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.8",
|
||||
"common-error",
|
||||
@@ -2097,7 +2097,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-macro",
|
||||
"http 1.3.1",
|
||||
@@ -2108,7 +2108,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-event-recorder"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2130,7 +2130,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2152,7 +2152,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -2210,7 +2210,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2227,7 +2227,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2260,7 +2260,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2280,7 +2280,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"greptime-proto",
|
||||
"once_cell",
|
||||
@@ -2291,7 +2291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common-error",
|
||||
@@ -2307,7 +2307,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2379,7 +2379,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2388,11 +2388,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2404,7 +2404,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-stream",
|
||||
@@ -2433,7 +2433,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2443,7 +2443,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2468,7 +2468,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2489,7 +2489,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.40",
|
||||
@@ -2518,7 +2518,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-session"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"strum 0.27.1",
|
||||
@@ -2526,7 +2526,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-sql"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -2544,7 +2544,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-base",
|
||||
@@ -2573,7 +2573,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-grpc",
|
||||
@@ -2586,7 +2586,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2604,7 +2604,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"cargo-manifest",
|
||||
@@ -2615,7 +2615,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2638,7 +2638,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-workload"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"common-telemetry",
|
||||
"serde",
|
||||
@@ -3865,7 +3865,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3918,7 +3918,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.23",
|
||||
@@ -3928,7 +3928,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4602,7 +4602,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4734,7 +4734,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -4801,7 +4801,7 @@ dependencies = [
|
||||
"sql",
|
||||
"store-api",
|
||||
"strum 0.27.1",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.13.1",
|
||||
@@ -4856,7 +4856,7 @@ checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -4919,7 +4919,7 @@ dependencies = [
|
||||
"sqlparser 0.55.0-greptime",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -6061,7 +6061,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -7001,7 +7001,7 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -7013,7 +7013,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -7284,8 +7284,7 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||
[[package]]
|
||||
name = "memcomparable"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "376101dbd964fc502d5902216e180f92b3d003b5cc3d2e40e044eb5470fca677"
|
||||
source = "git+https://github.com/v0y4g3r/memcomparable.git?rev=a07122dc03556bbd88ad66234cbea7efd3b23efb#a07122dc03556bbd88ad66234cbea7efd3b23efb"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"serde",
|
||||
@@ -7321,7 +7320,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7349,7 +7348,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7445,7 +7444,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7538,7 +7537,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito-codec"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bytes",
|
||||
@@ -7562,7 +7561,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7603,7 +7602,6 @@ dependencies = [
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
"log-store",
|
||||
"memcomparable",
|
||||
"mito-codec",
|
||||
"moka",
|
||||
"object-store",
|
||||
@@ -8297,7 +8295,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -8582,7 +8580,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -8640,7 +8638,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.55.0-greptime",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -8952,7 +8950,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9291,7 +9289,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -9447,7 +9445,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.40",
|
||||
@@ -9745,7 +9743,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"async-trait",
|
||||
@@ -10028,7 +10026,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-compression 0.4.19",
|
||||
"async-trait",
|
||||
@@ -10070,7 +10068,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -10135,7 +10133,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.55.0-greptime",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -11499,7 +11497,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -11622,7 +11620,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -11635,6 +11633,7 @@ dependencies = [
|
||||
"common-session",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datafusion-common",
|
||||
"derive_builder 0.20.2",
|
||||
"derive_more",
|
||||
"snafu 0.8.6",
|
||||
@@ -11949,7 +11948,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-buffer",
|
||||
@@ -12007,7 +12006,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.40",
|
||||
@@ -12307,7 +12306,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "stat"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"nix 0.30.1",
|
||||
]
|
||||
@@ -12320,7 +12319,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -12335,6 +12334,7 @@ dependencies = [
|
||||
"common-sql",
|
||||
"common-time",
|
||||
"common-wal",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-physical-plan",
|
||||
"datatypes",
|
||||
@@ -12466,7 +12466,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -12634,7 +12634,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12903,7 +12903,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -12947,7 +12947,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -13019,7 +13019,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -73,7 +73,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.17.0"
|
||||
version = "0.17.1"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
|
||||
2
Makefile
2
Makefile
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
|
||||
IMAGE_REGISTRY ?= docker.io
|
||||
IMAGE_NAMESPACE ?= greptime
|
||||
IMAGE_TAG ?= latest
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-32619816-20250818043248
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-f55023f3-20250829091211
|
||||
BUILDX_MULTI_PLATFORM_BUILD ?= false
|
||||
BUILDX_BUILDER_NAME ?= gtbuilder
|
||||
BASE_IMAGE ?= ubuntu
|
||||
|
||||
@@ -148,7 +148,7 @@
|
||||
| `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
@@ -402,8 +402,8 @@
|
||||
| `event_recorder` | -- | -- | Configuration options for the event recorder. |
|
||||
| `event_recorder.ttl` | String | `90d` | TTL for the events table that will be used to store the events. Default is `90d`. |
|
||||
| `stats_persistence` | -- | -- | Configuration options for the stats persistence. |
|
||||
| `stats_persistence.ttl` | String | `30d` | TTL for the stats table that will be used to store the stats. Default is `30d`.<br/>Set to `0s` to disable stats persistence. |
|
||||
| `stats_persistence.interval` | String | `60s` | The interval to persist the stats. Default is `60s`.<br/>The minimum value is `60s`, if the value is less than `60s`, it will be overridden to `60s`. |
|
||||
| `stats_persistence.ttl` | String | `0s` | TTL for the stats table that will be used to store the stats.<br/>Set to `0s` to disable stats persistence.<br/>Default is `0s`.<br/>If you want to enable stats persistence, set the TTL to a value greater than 0.<br/>It is recommended to set a small value, e.g., `3h`. |
|
||||
| `stats_persistence.interval` | String | `10m` | The interval to persist the stats. Default is `10m`.<br/>The minimum value is `10m`, if the value is less than `10m`, it will be overridden to `10m`. |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
@@ -540,7 +540,7 @@
|
||||
| `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
|
||||
@@ -488,7 +488,7 @@ sst_write_buffer_size = "8MB"
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
## Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files = 128
|
||||
max_concurrent_scan_files = 384
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
@@ -274,12 +274,15 @@ ttl = "90d"
|
||||
|
||||
## Configuration options for the stats persistence.
|
||||
[stats_persistence]
|
||||
## TTL for the stats table that will be used to store the stats. Default is `30d`.
|
||||
## TTL for the stats table that will be used to store the stats.
|
||||
## Set to `0s` to disable stats persistence.
|
||||
ttl = "30d"
|
||||
## The interval to persist the stats. Default is `60s`.
|
||||
## The minimum value is `60s`, if the value is less than `60s`, it will be overridden to `60s`.
|
||||
interval = "60s"
|
||||
## Default is `0s`.
|
||||
## If you want to enable stats persistence, set the TTL to a value greater than 0.
|
||||
## It is recommended to set a small value, e.g., `3h`.
|
||||
ttl = "0s"
|
||||
## The interval to persist the stats. Default is `10m`.
|
||||
## The minimum value is `10m`, if the value is less than `10m`, it will be overridden to `10m`.
|
||||
interval = "10m"
|
||||
|
||||
## The logging options.
|
||||
[logging]
|
||||
|
||||
@@ -567,7 +567,7 @@ sst_write_buffer_size = "8MB"
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
## Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files = 128
|
||||
max_concurrent_scan_files = 384
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -87,6 +87,13 @@
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
|
||||
# Remote WAL
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Triggered region flush total | `meta_triggered_region_flush_total` | `timeseries` | Triggered region flush total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
|
||||
| Triggered region checkpoint total | `meta_triggered_region_checkpoint_total` | `timeseries` | Triggered region checkpoint total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
|
||||
| Topic estimated replay size | `meta_topic_estimated_replay_size` | `timeseries` | Topic estimated max replay size | `prometheus` | `bytes` | `{{pod}}-{{topic_name}}` |
|
||||
| Kafka logstore's bytes traffic | `rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])` | `timeseries` | Kafka logstore's bytes traffic | `prometheus` | `bytes` | `{{pod}}-{{logstore}}` |
|
||||
# Metasrv
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
@@ -103,6 +110,8 @@
|
||||
| Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
|
||||
| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
|
||||
| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
|
||||
| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
|
||||
| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
|
||||
# Flownode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
|
||||
@@ -802,6 +802,48 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
|
||||
- title: Remote WAL
|
||||
panels:
|
||||
- title: Triggered region flush total
|
||||
type: timeseries
|
||||
description: Triggered region flush total
|
||||
unit: none
|
||||
queries:
|
||||
- expr: meta_triggered_region_flush_total
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{topic_name}}'
|
||||
- title: Triggered region checkpoint total
|
||||
type: timeseries
|
||||
description: Triggered region checkpoint total
|
||||
unit: none
|
||||
queries:
|
||||
- expr: meta_triggered_region_checkpoint_total
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{topic_name}}'
|
||||
- title: Topic estimated replay size
|
||||
type: timeseries
|
||||
description: Topic estimated max replay size
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: meta_topic_estimated_replay_size
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{topic_name}}'
|
||||
- title: Kafka logstore's bytes traffic
|
||||
type: timeseries
|
||||
description: Kafka logstore's bytes traffic
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{logstore}}'
|
||||
- title: Metasrv
|
||||
panels:
|
||||
- title: Region migration datanode
|
||||
@@ -948,6 +990,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: AlterTable-{{step}} p90
|
||||
- title: Reconciliation stats
|
||||
type: timeseries
|
||||
description: Reconciliation stats
|
||||
unit: s
|
||||
queries:
|
||||
- expr: greptime_meta_reconciliation_stats
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{table_type}}-{{type}}'
|
||||
- title: Reconciliation steps
|
||||
type: timeseries
|
||||
description: 'Elapsed of Reconciliation steps '
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{procedure_name}}-{{step}}-P90'
|
||||
- title: Flownode
|
||||
panels:
|
||||
- title: Flow Ingest / Output Rate
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -87,6 +87,13 @@
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
|
||||
# Remote WAL
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Triggered region flush total | `meta_triggered_region_flush_total` | `timeseries` | Triggered region flush total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
|
||||
| Triggered region checkpoint total | `meta_triggered_region_checkpoint_total` | `timeseries` | Triggered region checkpoint total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
|
||||
| Topic estimated replay size | `meta_topic_estimated_replay_size` | `timeseries` | Topic estimated max replay size | `prometheus` | `bytes` | `{{pod}}-{{topic_name}}` |
|
||||
| Kafka logstore's bytes traffic | `rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])` | `timeseries` | Kafka logstore's bytes traffic | `prometheus` | `bytes` | `{{pod}}-{{logstore}}` |
|
||||
# Metasrv
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
@@ -103,6 +110,8 @@
|
||||
| Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
|
||||
| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
|
||||
| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
|
||||
| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
|
||||
| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
|
||||
# Flownode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
|
||||
@@ -802,6 +802,48 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
|
||||
- title: Remote WAL
|
||||
panels:
|
||||
- title: Triggered region flush total
|
||||
type: timeseries
|
||||
description: Triggered region flush total
|
||||
unit: none
|
||||
queries:
|
||||
- expr: meta_triggered_region_flush_total
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{topic_name}}'
|
||||
- title: Triggered region checkpoint total
|
||||
type: timeseries
|
||||
description: Triggered region checkpoint total
|
||||
unit: none
|
||||
queries:
|
||||
- expr: meta_triggered_region_checkpoint_total
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{topic_name}}'
|
||||
- title: Topic estimated replay size
|
||||
type: timeseries
|
||||
description: Topic estimated max replay size
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: meta_topic_estimated_replay_size
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{topic_name}}'
|
||||
- title: Kafka logstore's bytes traffic
|
||||
type: timeseries
|
||||
description: Kafka logstore's bytes traffic
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{logstore}}'
|
||||
- title: Metasrv
|
||||
panels:
|
||||
- title: Region migration datanode
|
||||
@@ -948,6 +990,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: AlterTable-{{step}} p90
|
||||
- title: Reconciliation stats
|
||||
type: timeseries
|
||||
description: Reconciliation stats
|
||||
unit: s
|
||||
queries:
|
||||
- expr: greptime_meta_reconciliation_stats
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{pod}}-{{table_type}}-{{type}}'
|
||||
- title: Reconciliation steps
|
||||
type: timeseries
|
||||
description: 'Elapsed of Reconciliation steps '
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '{{procedure_name}}-{{step}}-P90'
|
||||
- title: Flownode
|
||||
panels:
|
||||
- title: Flow Ingest / Output Rate
|
||||
|
||||
@@ -30,8 +30,7 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
|
||||
UInt64VectorBuilder,
|
||||
StringVectorBuilder, TimestampSecondVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -107,17 +106,17 @@ impl InformationSchemaTables {
|
||||
ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
CREATE_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
UPDATE_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CHECK_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
|
||||
@@ -194,9 +193,9 @@ struct InformationSchemaTablesBuilder {
|
||||
max_index_length: UInt64VectorBuilder,
|
||||
data_free: UInt64VectorBuilder,
|
||||
auto_increment: UInt64VectorBuilder,
|
||||
create_time: TimestampMicrosecondVectorBuilder,
|
||||
update_time: TimestampMicrosecondVectorBuilder,
|
||||
check_time: TimestampMicrosecondVectorBuilder,
|
||||
create_time: TimestampSecondVectorBuilder,
|
||||
update_time: TimestampSecondVectorBuilder,
|
||||
check_time: TimestampSecondVectorBuilder,
|
||||
table_collation: StringVectorBuilder,
|
||||
checksum: UInt64VectorBuilder,
|
||||
create_options: StringVectorBuilder,
|
||||
@@ -231,9 +230,9 @@ impl InformationSchemaTablesBuilder {
|
||||
max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
update_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
check_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
@@ -380,7 +379,7 @@ impl InformationSchemaTablesBuilder {
|
||||
self.create_options
|
||||
.push(Some(table_info.meta.options.to_string().as_ref()));
|
||||
self.create_time
|
||||
.push(Some(table_info.meta.created_on.timestamp_millis().into()));
|
||||
.push(Some(table_info.meta.created_on.timestamp().into()));
|
||||
|
||||
self.temporary
|
||||
.push(if matches!(table_type, TableType::Temporary) {
|
||||
|
||||
@@ -83,6 +83,20 @@ pub(crate) struct StoreConfig {
|
||||
}
|
||||
|
||||
impl StoreConfig {
|
||||
pub fn tls_config(&self) -> Option<TlsOption> {
|
||||
if self.backend_tls_mode != TlsMode::Disable {
|
||||
Some(TlsOption {
|
||||
mode: self.backend_tls_mode.clone(),
|
||||
cert_path: self.backend_tls_cert_path.clone(),
|
||||
key_path: self.backend_tls_key_path.clone(),
|
||||
ca_cert_path: self.backend_tls_ca_cert_path.clone(),
|
||||
watch: self.backend_tls_watch,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a [`KvBackendRef`] from the store configuration.
|
||||
pub async fn build(&self) -> Result<KvBackendRef, BoxedError> {
|
||||
let max_txn_ops = self.max_txn_ops;
|
||||
@@ -92,17 +106,7 @@ impl StoreConfig {
|
||||
} else {
|
||||
let kvbackend = match self.backend {
|
||||
BackendImpl::EtcdStore => {
|
||||
let tls_config = if self.backend_tls_mode != TlsMode::Disable {
|
||||
Some(TlsOption {
|
||||
mode: self.backend_tls_mode.clone(),
|
||||
cert_path: self.backend_tls_cert_path.clone(),
|
||||
key_path: self.backend_tls_key_path.clone(),
|
||||
ca_cert_path: self.backend_tls_ca_cert_path.clone(),
|
||||
watch: self.backend_tls_watch,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let tls_config = self.tls_config();
|
||||
let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
@@ -111,7 +115,8 @@ impl StoreConfig {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
BackendImpl::PostgresStore => {
|
||||
let table_name = &self.meta_table_name;
|
||||
let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, None)
|
||||
let tls_config = self.tls_config();
|
||||
let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, tls_config)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
let schema_name = self.meta_schema_name.as_deref();
|
||||
|
||||
@@ -196,7 +196,10 @@ pub async fn stream_to_parquet(
|
||||
concurrency: usize,
|
||||
) -> Result<usize> {
|
||||
let write_props = column_wise_config(
|
||||
WriterProperties::builder().set_compression(Compression::ZSTD(ZstdLevel::default())),
|
||||
WriterProperties::builder()
|
||||
.set_compression(Compression::ZSTD(ZstdLevel::default()))
|
||||
.set_statistics_truncate_length(None)
|
||||
.set_column_index_truncate_length(None),
|
||||
schema,
|
||||
)
|
||||
.build();
|
||||
|
||||
@@ -251,7 +251,6 @@ macro_rules! define_from_tonic_status {
|
||||
.get(key)
|
||||
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
|
||||
}
|
||||
|
||||
let code = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_CODE)
|
||||
.and_then(|s| {
|
||||
if let Ok(code) = s.parse::<u32>() {
|
||||
|
||||
@@ -34,6 +34,33 @@ pub struct ClampFunction;
|
||||
|
||||
const CLAMP_NAME: &str = "clamp";
|
||||
|
||||
/// Ensure the vector is constant and not empty (i.e., all values are identical)
|
||||
fn ensure_constant_vector(vector: &VectorRef) -> Result<()> {
|
||||
ensure!(
|
||||
!vector.is_empty(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "Expect at least one value",
|
||||
}
|
||||
);
|
||||
|
||||
if vector.is_const() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let first = vector.get_ref(0);
|
||||
for i in 1..vector.len() {
|
||||
let v = vector.get_ref(i);
|
||||
if first != v {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: "All values in min/max argument must be identical",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl Function for ClampFunction {
|
||||
fn name(&self) -> &str {
|
||||
CLAMP_NAME
|
||||
@@ -80,16 +107,9 @@ impl Function for ClampFunction {
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
(columns[1].len() == 1 || columns[1].is_const())
|
||||
&& (columns[2].len() == 1 || columns[2].is_const()),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The second and third args should be scalar, have: {:?}, {:?}",
|
||||
columns[1], columns[2]
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
ensure_constant_vector(&columns[1])?;
|
||||
ensure_constant_vector(&columns[2])?;
|
||||
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
let input_array = columns[0].to_arrow_array();
|
||||
@@ -204,15 +224,8 @@ impl Function for ClampMinFunction {
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
columns[1].len() == 1 || columns[1].is_const(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The second arg (min) should be scalar, have: {:?}",
|
||||
columns[1]
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
ensure_constant_vector(&columns[1])?;
|
||||
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
let input_array = columns[0].to_arrow_array();
|
||||
@@ -292,15 +305,8 @@ impl Function for ClampMaxFunction {
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
columns[1].len() == 1 || columns[1].is_const(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The second arg (max) should be scalar, have: {:?}",
|
||||
columns[1]
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
ensure_constant_vector(&columns[1])?;
|
||||
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
let input_array = columns[0].to_arrow_array();
|
||||
@@ -537,8 +543,8 @@ mod test {
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(Float64Vector::from(input)) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![min, min])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![min, max])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![max, min])) as _,
|
||||
];
|
||||
let result = func.eval(&FunctionContext::default(), args.as_slice());
|
||||
assert!(result.is_err());
|
||||
|
||||
@@ -16,15 +16,12 @@ use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::FromScalarValueSnafu;
|
||||
use common_query::prelude::ColumnarValue;
|
||||
use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
|
||||
use datafusion_expr::ScalarUDF;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::function::{FunctionContext, FunctionRef};
|
||||
use crate::state::FunctionState;
|
||||
@@ -76,13 +73,7 @@ impl ScalarUDFImpl for ScalarUdf {
|
||||
let columns = args
|
||||
.args
|
||||
.iter()
|
||||
.map(|x| {
|
||||
ColumnarValue::try_from(x).and_then(|y| match y {
|
||||
ColumnarValue::Vector(z) => Ok(z),
|
||||
ColumnarValue::Scalar(z) => Helper::try_from_scalar_value(z, args.number_rows)
|
||||
.context(FromScalarValueSnafu),
|
||||
})
|
||||
})
|
||||
.map(|x| ColumnarValue::try_from(x).and_then(|y| y.try_into_vector(args.number_rows)))
|
||||
.collect::<common_query::error::Result<Vec<_>>>()?;
|
||||
let v = self
|
||||
.function
|
||||
|
||||
@@ -535,9 +535,6 @@ pub enum Error {
|
||||
source: common_wal::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to resolve Kafka broker endpoint."))]
|
||||
ResolveKafkaEndpoint { source: common_wal::error::Error },
|
||||
|
||||
#[snafu(display("Failed to build a Kafka controller client"))]
|
||||
BuildKafkaCtrlClient {
|
||||
#[snafu(implicit)]
|
||||
@@ -1108,7 +1105,6 @@ impl ErrorExt for Error {
|
||||
| BuildKafkaClient { .. }
|
||||
| BuildKafkaCtrlClient { .. }
|
||||
| KafkaPartitionClient { .. }
|
||||
| ResolveKafkaEndpoint { .. }
|
||||
| ProduceRecord { .. }
|
||||
| CreateKafkaWalTopic { .. }
|
||||
| EmptyTopicPool { .. }
|
||||
|
||||
@@ -108,10 +108,6 @@ pub struct OpenRegion {
|
||||
pub region_wal_options: HashMap<RegionNumber, String>,
|
||||
#[serde(default)]
|
||||
pub skip_wal_replay: bool,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub replay_entry_id: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata_replay_entry_id: Option<u64>,
|
||||
}
|
||||
|
||||
impl OpenRegion {
|
||||
@@ -128,22 +124,8 @@ impl OpenRegion {
|
||||
region_options,
|
||||
region_wal_options,
|
||||
skip_wal_replay,
|
||||
replay_entry_id: None,
|
||||
metadata_replay_entry_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the replay entry id.
|
||||
pub fn with_replay_entry_id(mut self, replay_entry_id: Option<u64>) -> Self {
|
||||
self.replay_entry_id = replay_entry_id;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the metadata replay entry id.
|
||||
pub fn with_metadata_replay_entry_id(mut self, metadata_replay_entry_id: Option<u64>) -> Self {
|
||||
self.metadata_replay_entry_id = metadata_replay_entry_id;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// The instruction of downgrading leader region.
|
||||
@@ -169,7 +151,7 @@ impl Display for DowngradeRegion {
|
||||
}
|
||||
|
||||
/// Upgrades a follower region to leader region.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
pub struct UpgradeRegion {
|
||||
/// The [RegionId].
|
||||
pub region_id: RegionId,
|
||||
@@ -186,6 +168,24 @@ pub struct UpgradeRegion {
|
||||
/// The hint for replaying memtable.
|
||||
#[serde(default)]
|
||||
pub location_id: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub replay_entry_id: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata_replay_entry_id: Option<u64>,
|
||||
}
|
||||
|
||||
impl UpgradeRegion {
|
||||
/// Sets the replay entry id.
|
||||
pub fn with_replay_entry_id(mut self, replay_entry_id: Option<u64>) -> Self {
|
||||
self.replay_entry_id = replay_entry_id;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the metadata replay entry id.
|
||||
pub fn with_metadata_replay_entry_id(mut self, metadata_replay_entry_id: Option<u64>) -> Self {
|
||||
self.metadata_replay_entry_id = metadata_replay_entry_id;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
@@ -370,8 +370,6 @@ mod tests {
|
||||
region_options,
|
||||
region_wal_options: HashMap::new(),
|
||||
skip_wal_replay: false,
|
||||
replay_entry_id: None,
|
||||
metadata_replay_entry_id: None,
|
||||
};
|
||||
assert_eq!(expected, deserialized);
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ pub struct TopicRegionValue {
|
||||
pub checkpoint: Option<ReplayCheckpoint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct ReplayCheckpoint {
|
||||
#[serde(default)]
|
||||
pub entry_id: u64,
|
||||
|
||||
@@ -100,7 +100,7 @@ impl EtcdStore {
|
||||
.with_label_values(&["etcd", "txn"])
|
||||
.start_timer();
|
||||
let txn = Txn::new().and_then(part);
|
||||
self.client.kv_client().txn(txn).await
|
||||
self.kv_client().txn(txn).await
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ use datatypes::schema::ColumnSchema;
|
||||
use futures::future::{join_all, try_join_all};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata};
|
||||
use store_api::storage::consts::ReservedColumnId;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::{RawTableInfo, RawTableMeta};
|
||||
use table::table_name::TableName;
|
||||
@@ -384,6 +385,7 @@ pub(crate) fn build_table_meta_from_column_metadatas(
|
||||
|
||||
*next_column_id = column_ids
|
||||
.iter()
|
||||
.filter(|id| !ReservedColumnId::is_reserved(**id))
|
||||
.max()
|
||||
.map(|max| max + 1)
|
||||
.unwrap_or(*next_column_id)
|
||||
@@ -1039,9 +1041,13 @@ mod tests {
|
||||
fn test_build_table_info_from_column_metadatas() {
|
||||
let mut column_metadatas = new_test_column_metadatas();
|
||||
column_metadatas.push(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col3", ConcreteDataType::string_datatype(), true),
|
||||
column_schema: ColumnSchema::new(
|
||||
"__table_id",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 3,
|
||||
column_id: ReservedColumnId::table_id(),
|
||||
});
|
||||
|
||||
let table_id = 1;
|
||||
@@ -1066,8 +1072,11 @@ mod tests {
|
||||
assert_eq!(new_table_meta.partition_key_indices, vec![2]);
|
||||
assert_eq!(new_table_meta.value_indices, vec![1, 2]);
|
||||
assert_eq!(new_table_meta.schema.timestamp_index, Some(1));
|
||||
assert_eq!(new_table_meta.column_ids, vec![0, 1, 2, 3]);
|
||||
assert_eq!(new_table_meta.next_column_id, 4);
|
||||
assert_eq!(
|
||||
new_table_meta.column_ids,
|
||||
vec![0, 1, 2, ReservedColumnId::table_id()]
|
||||
);
|
||||
assert_eq!(new_table_meta.next_column_id, table_meta.next_column_id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -25,8 +25,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{
|
||||
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, CreateKafkaWalTopicSnafu,
|
||||
KafkaGetOffsetSnafu, KafkaPartitionClientSnafu, ProduceRecordSnafu, ResolveKafkaEndpointSnafu,
|
||||
Result, TlsConfigSnafu,
|
||||
KafkaGetOffsetSnafu, KafkaPartitionClientSnafu, ProduceRecordSnafu, Result, TlsConfigSnafu,
|
||||
};
|
||||
|
||||
// Each topic only has one partition for now.
|
||||
@@ -209,10 +208,8 @@ impl KafkaTopicCreator {
|
||||
/// Builds a kafka [Client](rskafka::client::Client).
|
||||
pub async fn build_kafka_client(connection: &KafkaConnectionConfig) -> Result<Client> {
|
||||
// Builds an kafka controller client for creating topics.
|
||||
let broker_endpoints = common_wal::resolve_to_ipv4(&connection.broker_endpoints)
|
||||
.await
|
||||
.context(ResolveKafkaEndpointSnafu)?;
|
||||
let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(DEFAULT_BACKOFF_CONFIG);
|
||||
let mut builder = ClientBuilder::new(connection.broker_endpoints.clone())
|
||||
.backoff_config(DEFAULT_BACKOFF_CONFIG);
|
||||
if let Some(sasl) = &connection.sasl {
|
||||
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
|
||||
};
|
||||
|
||||
@@ -238,10 +238,7 @@ mod tests {
|
||||
// Upgrade region
|
||||
let instruction = Instruction::UpgradeRegion(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
});
|
||||
assert!(
|
||||
heartbeat_handler.is_acceptable(&heartbeat_env.create_handler_ctx((meta, instruction)))
|
||||
|
||||
@@ -16,7 +16,7 @@ use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
|
||||
use common_meta::wal_options_allocator::prepare_wal_options;
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::path_utils::table_dir;
|
||||
use store_api::region_request::{PathType, RegionOpenRequest, RegionRequest, ReplayCheckpoint};
|
||||
use store_api::region_request::{PathType, RegionOpenRequest, RegionRequest};
|
||||
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
|
||||
@@ -29,31 +29,18 @@ impl HandlerContext {
|
||||
mut region_options,
|
||||
region_wal_options,
|
||||
skip_wal_replay,
|
||||
replay_entry_id,
|
||||
metadata_replay_entry_id,
|
||||
}: OpenRegion,
|
||||
) -> BoxFuture<'static, Option<InstructionReply>> {
|
||||
Box::pin(async move {
|
||||
let region_id = Self::region_ident_to_region_id(®ion_ident);
|
||||
prepare_wal_options(&mut region_options, region_id, ®ion_wal_options);
|
||||
let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
|
||||
(Some(replay_entry_id), Some(metadata_replay_entry_id)) => Some(ReplayCheckpoint {
|
||||
entry_id: replay_entry_id,
|
||||
metadata_entry_id: Some(metadata_replay_entry_id),
|
||||
}),
|
||||
(Some(replay_entry_id), None) => Some(ReplayCheckpoint {
|
||||
entry_id: replay_entry_id,
|
||||
metadata_entry_id: None,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
let request = RegionRequest::Open(RegionOpenRequest {
|
||||
engine: region_ident.engine,
|
||||
table_dir: table_dir(®ion_storage_path, region_id.table_id()),
|
||||
path_type: PathType::Bare,
|
||||
options: region_options,
|
||||
skip_wal_replay,
|
||||
checkpoint,
|
||||
checkpoint: None,
|
||||
});
|
||||
let result = self.region_server.handle_request(region_id, request).await;
|
||||
let success = result.is_ok();
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
|
||||
use common_telemetry::{info, warn};
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::region_request::{RegionCatchupRequest, RegionRequest};
|
||||
use store_api::region_request::{RegionCatchupRequest, RegionRequest, ReplayCheckpoint};
|
||||
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
use crate::heartbeat::task_tracker::WaitResult;
|
||||
@@ -29,6 +29,8 @@ impl HandlerContext {
|
||||
metadata_last_entry_id,
|
||||
replay_timeout,
|
||||
location_id,
|
||||
replay_entry_id,
|
||||
metadata_replay_entry_id,
|
||||
}: UpgradeRegion,
|
||||
) -> BoxFuture<'static, Option<InstructionReply>> {
|
||||
Box::pin(async move {
|
||||
@@ -50,6 +52,14 @@ impl HandlerContext {
|
||||
|
||||
let region_server_moved = self.region_server.clone();
|
||||
|
||||
let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
|
||||
(Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
|
||||
entry_id,
|
||||
metadata_entry_id,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// The catchup task is almost zero cost if the inside region is writable.
|
||||
// Therefore, it always registers a new catchup task.
|
||||
let register_result = self
|
||||
@@ -66,6 +76,7 @@ impl HandlerContext {
|
||||
entry_id: last_entry_id,
|
||||
metadata_entry_id: metadata_last_entry_id,
|
||||
location_id,
|
||||
checkpoint,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
@@ -148,10 +159,8 @@ mod tests {
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
@@ -187,10 +196,8 @@ mod tests {
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
@@ -227,10 +234,8 @@ mod tests {
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
@@ -271,9 +276,7 @@ mod tests {
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
replay_timeout,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
@@ -289,10 +292,8 @@ mod tests {
|
||||
let reply = handler_context
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout: Some(Duration::from_millis(500)),
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
@@ -332,10 +333,7 @@ mod tests {
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
@@ -351,10 +349,8 @@ mod tests {
|
||||
.clone()
|
||||
.handle_upgrade_region_instruction(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id: None,
|
||||
metadata_last_entry_id: None,
|
||||
replay_timeout: Some(Duration::from_millis(200)),
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
|
||||
|
||||
@@ -128,6 +128,10 @@ impl Helper {
|
||||
ScalarValue::Boolean(v) => {
|
||||
ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Float16(v) => ConstantVector::new(
|
||||
Arc::new(Float32Vector::from(vec![v.map(f32::from)])),
|
||||
length,
|
||||
),
|
||||
ScalarValue::Float32(v) => {
|
||||
ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
|
||||
}
|
||||
@@ -243,7 +247,6 @@ impl Helper {
|
||||
| ScalarValue::LargeList(_)
|
||||
| ScalarValue::Dictionary(_, _)
|
||||
| ScalarValue::Union(_, _, _)
|
||||
| ScalarValue::Float16(_)
|
||||
| ScalarValue::Utf8View(_)
|
||||
| ScalarValue::BinaryView(_)
|
||||
| ScalarValue::Map(_)
|
||||
|
||||
@@ -29,10 +29,15 @@ use common_runtime::JoinHandle;
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_time::TimeToLive;
|
||||
use datafusion_common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContext;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::parsers::utils::is_tql;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::table_reference::TableReference;
|
||||
use tokio::sync::{oneshot, RwLock};
|
||||
|
||||
use crate::batching_mode::frontend_client::FrontendClient;
|
||||
@@ -42,8 +47,8 @@ use crate::batching_mode::utils::sql_to_df_plan;
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
use crate::engine::FlowEngine;
|
||||
use crate::error::{
|
||||
CreateFlowSnafu, ExternalSnafu, FlowAlreadyExistSnafu, FlowNotFoundSnafu, InvalidQuerySnafu,
|
||||
TableNotFoundMetaSnafu, UnexpectedSnafu, UnsupportedSnafu,
|
||||
CreateFlowSnafu, DatafusionSnafu, ExternalSnafu, FlowAlreadyExistSnafu, FlowNotFoundSnafu,
|
||||
InvalidQuerySnafu, TableNotFoundMetaSnafu, UnexpectedSnafu, UnsupportedSnafu,
|
||||
};
|
||||
use crate::metrics::METRIC_FLOW_BATCHING_ENGINE_BULK_MARK_TIME_WINDOW;
|
||||
use crate::{CreateFlowArgs, Error, FlowId, TableName};
|
||||
@@ -151,9 +156,11 @@ impl BatchingEngine {
|
||||
let handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
|
||||
let src_table_names = &task.config.source_table_names;
|
||||
let mut all_dirty_windows = HashSet::new();
|
||||
let mut is_dirty = false;
|
||||
for src_table_name in src_table_names {
|
||||
if let Some((timestamps, unit)) = group_by_table_name.get(src_table_name) {
|
||||
let Some(expr) = &task.config.time_window_expr else {
|
||||
is_dirty = true;
|
||||
continue;
|
||||
};
|
||||
for timestamp in timestamps {
|
||||
@@ -168,6 +175,9 @@ impl BatchingEngine {
|
||||
}
|
||||
}
|
||||
let mut state = task.state.write().unwrap();
|
||||
if is_dirty {
|
||||
state.dirty_time_windows.set_dirty();
|
||||
}
|
||||
let flow_id_label = task.config.flow_id.to_string();
|
||||
for timestamp in all_dirty_windows {
|
||||
state.dirty_time_windows.add_window(timestamp, None);
|
||||
@@ -269,9 +279,12 @@ impl BatchingEngine {
|
||||
let handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
|
||||
let src_table_names = &task.config.source_table_names;
|
||||
|
||||
let mut is_dirty = false;
|
||||
|
||||
for src_table_name in src_table_names {
|
||||
if let Some(entry) = group_by_table_name.get(src_table_name) {
|
||||
let Some(expr) = &task.config.time_window_expr else {
|
||||
is_dirty = true;
|
||||
continue;
|
||||
};
|
||||
let involved_time_windows = expr.handle_rows(entry.clone()).await?;
|
||||
@@ -281,6 +294,10 @@ impl BatchingEngine {
|
||||
.add_lower_bounds(involved_time_windows.into_iter());
|
||||
}
|
||||
}
|
||||
if is_dirty {
|
||||
task.state.write().unwrap().dirty_time_windows.set_dirty();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
});
|
||||
handles.push(handle);
|
||||
@@ -370,13 +387,12 @@ impl BatchingEngine {
|
||||
}
|
||||
})?;
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
let is_tql = is_tql(query_ctx.sql_dialect(), &sql)
|
||||
.map_err(BoxedError::new)
|
||||
.context(CreateFlowSnafu { sql: &sql })?;
|
||||
|
||||
// optionally set a eval interval for the flow
|
||||
if eval_interval.is_none()
|
||||
&& is_tql(query_ctx.sql_dialect(), &sql)
|
||||
.map_err(BoxedError::new)
|
||||
.context(CreateFlowSnafu { sql: &sql })?
|
||||
{
|
||||
if eval_interval.is_none() && is_tql {
|
||||
InvalidQuerySnafu {
|
||||
reason: "TQL query requires EVAL INTERVAL to be set".to_string(),
|
||||
}
|
||||
@@ -418,6 +434,11 @@ impl BatchingEngine {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
let plan = sql_to_df_plan(query_ctx.clone(), self.query_engine.clone(), &sql, true).await?;
|
||||
|
||||
if is_tql {
|
||||
self.check_is_tql_table(&plan, &query_ctx).await?;
|
||||
}
|
||||
|
||||
let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
|
||||
&plan,
|
||||
self.query_engine.engine_state().catalog_manager().clone(),
|
||||
@@ -484,6 +505,131 @@ impl BatchingEngine {
|
||||
Ok(Some(flow_id))
|
||||
}
|
||||
|
||||
async fn check_is_tql_table(
|
||||
&self,
|
||||
query: &LogicalPlan,
|
||||
query_ctx: &QueryContext,
|
||||
) -> Result<(), Error> {
|
||||
struct CollectTableRef {
|
||||
table_refs: HashSet<datafusion_common::TableReference>,
|
||||
}
|
||||
|
||||
impl TreeNodeVisitor<'_> for CollectTableRef {
|
||||
type Node = LogicalPlan;
|
||||
fn f_down(
|
||||
&mut self,
|
||||
node: &Self::Node,
|
||||
) -> datafusion_common::Result<TreeNodeRecursion> {
|
||||
if let LogicalPlan::TableScan(scan) = node {
|
||||
self.table_refs.insert(scan.table_name.clone());
|
||||
}
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
}
|
||||
}
|
||||
let mut table_refs = CollectTableRef {
|
||||
table_refs: HashSet::new(),
|
||||
};
|
||||
query
|
||||
.visit_with_subqueries(&mut table_refs)
|
||||
.context(DatafusionSnafu {
|
||||
context: "Checking if all source tables are TQL tables",
|
||||
})?;
|
||||
|
||||
let default_catalog = query_ctx.current_catalog();
|
||||
let default_schema = query_ctx.current_schema();
|
||||
let default_schema = &default_schema;
|
||||
|
||||
for table_ref in table_refs.table_refs {
|
||||
let table_ref = match &table_ref {
|
||||
datafusion_common::TableReference::Bare { table } => {
|
||||
TableReference::full(default_catalog, default_schema, table)
|
||||
}
|
||||
datafusion_common::TableReference::Partial { schema, table } => {
|
||||
TableReference::full(default_catalog, schema, table)
|
||||
}
|
||||
datafusion_common::TableReference::Full {
|
||||
catalog,
|
||||
schema,
|
||||
table,
|
||||
} => TableReference::full(catalog, schema, table),
|
||||
};
|
||||
|
||||
let table_id = self
|
||||
.table_meta
|
||||
.table_name_manager()
|
||||
.get(table_ref.into())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Failed to get table id for table: {}", table_ref),
|
||||
})?
|
||||
.table_id();
|
||||
let table_info =
|
||||
get_table_info(self.table_meta.table_info_manager(), &table_id).await?;
|
||||
// first check if it's only one f64 value column
|
||||
let value_cols = table_info
|
||||
.table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas
|
||||
.iter()
|
||||
.filter(|col| col.data_type == ConcreteDataType::float64_datatype())
|
||||
.collect::<Vec<_>>();
|
||||
ensure!(
|
||||
value_cols.len() == 1,
|
||||
InvalidQuerySnafu {
|
||||
reason: format!(
|
||||
"TQL query only supports one f64 value column, table `{}`(id={}) has {} f64 value columns, columns are: {:?}",
|
||||
table_ref,
|
||||
table_id,
|
||||
value_cols.len(),
|
||||
value_cols
|
||||
),
|
||||
}
|
||||
);
|
||||
// TODO(discord9): do need to check rest columns is string and is tag column?
|
||||
let pk_idxs = table_info
|
||||
.table_info
|
||||
.meta
|
||||
.primary_key_indices
|
||||
.iter()
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
for (idx, col) in table_info
|
||||
.table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
// three cases:
|
||||
// 1. val column
|
||||
// 2. timestamp column
|
||||
// 3. tag column (string)
|
||||
|
||||
let is_pk: bool = pk_idxs.contains(&&idx);
|
||||
|
||||
ensure!(
|
||||
col.data_type == ConcreteDataType::float64_datatype()
|
||||
|| col.data_type.is_timestamp()
|
||||
|| (col.data_type == ConcreteDataType::string_datatype() && is_pk),
|
||||
InvalidQuerySnafu {
|
||||
reason: format!(
|
||||
"TQL query only supports f64 value column, timestamp column and string tag columns, table `{}`(id={}) has column `{}` with type {:?} which is not supported",
|
||||
table_ref,
|
||||
table_id,
|
||||
col.name,
|
||||
col.data_type
|
||||
),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn remove_flow_inner(&self, flow_id: FlowId) -> Result<(), Error> {
|
||||
if self.tasks.write().await.remove(&flow_id).is_none() {
|
||||
warn!("Flow {flow_id} not found in tasks");
|
||||
|
||||
@@ -203,11 +203,21 @@ impl DirtyTimeWindows {
|
||||
self.windows.clear();
|
||||
}
|
||||
|
||||
/// Set windows to be dirty, only useful for full aggr without time window
|
||||
/// to mark some new data is inserted
|
||||
pub fn set_dirty(&mut self) {
|
||||
self.windows.insert(Timestamp::new_second(0), None);
|
||||
}
|
||||
|
||||
/// Number of dirty windows.
|
||||
pub fn len(&self) -> usize {
|
||||
self.windows.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.windows.is_empty()
|
||||
}
|
||||
|
||||
/// Get the effective count of time windows, which is the number of time windows that can be
|
||||
/// used for query, compute from total time window range divided by `window_size`.
|
||||
pub fn effective_count(&self, window_size: &Duration) -> usize {
|
||||
|
||||
@@ -48,8 +48,8 @@ use crate::batching_mode::frontend_client::FrontendClient;
|
||||
use crate::batching_mode::state::{FilterExprInfo, TaskState};
|
||||
use crate::batching_mode::time_window::TimeWindowExpr;
|
||||
use crate::batching_mode::utils::{
|
||||
get_table_info_df_schema, sql_to_df_plan, AddAutoColumnRewriter, AddFilterRewriter,
|
||||
FindGroupByFinalName,
|
||||
gen_plan_with_matching_schema, get_table_info_df_schema, sql_to_df_plan, AddFilterRewriter,
|
||||
ColumnMatcherRewriter, FindGroupByFinalName,
|
||||
};
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
use crate::df_optimizer::apply_df_optimizer;
|
||||
@@ -618,42 +618,63 @@ impl BatchingTask {
|
||||
.map(|expr| expr.eval(low_bound))
|
||||
.transpose()?;
|
||||
|
||||
let (Some((Some(l), Some(u))), QueryType::Sql) =
|
||||
(expire_time_window_bound, &self.config.query_type)
|
||||
else {
|
||||
// either no time window or not a sql query, then just use the original query
|
||||
// use sink_table_meta to add to query the `update_at` and `__ts_placeholder` column's value too for compatibility reason
|
||||
debug!(
|
||||
"Flow id = {:?}, can't get window size: precise_lower_bound={expire_time_window_bound:?}, using the same query", self.config.flow_id
|
||||
);
|
||||
// clean dirty time window too, this could be from create flow's check_execute
|
||||
self.state.write().unwrap().dirty_time_windows.clean();
|
||||
let (expire_lower_bound, expire_upper_bound) =
|
||||
match (expire_time_window_bound, &self.config.query_type) {
|
||||
(Some((Some(l), Some(u))), QueryType::Sql) => (l, u),
|
||||
(None, QueryType::Sql) => {
|
||||
// if it's sql query and no time window lower/upper bound is found, just return the original query(with auto columns)
|
||||
// use sink_table_meta to add to query the `update_at` and `__ts_placeholder` column's value too for compatibility reason
|
||||
debug!(
|
||||
"Flow id = {:?}, no time window, using the same query",
|
||||
self.config.flow_id
|
||||
);
|
||||
// clean dirty time window too, this could be from create flow's check_execute
|
||||
let is_dirty = !self.state.read().unwrap().dirty_time_windows.is_empty();
|
||||
self.state.write().unwrap().dirty_time_windows.clean();
|
||||
|
||||
// TODO(discord9): not add auto column for tql query?
|
||||
let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone());
|
||||
if !is_dirty {
|
||||
// no dirty data, hence no need to update
|
||||
debug!("Flow id={:?}, no new data, not update", self.config.flow_id);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false)
|
||||
.await?;
|
||||
let plan = gen_plan_with_matching_schema(
|
||||
&self.config.query,
|
||||
query_ctx,
|
||||
engine,
|
||||
sink_table_schema.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let plan = plan
|
||||
.clone()
|
||||
.rewrite(&mut add_auto_column)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to rewrite plan:\n {}\n", plan),
|
||||
})?
|
||||
.data;
|
||||
return Ok(Some(PlanInfo { plan, filter: None }));
|
||||
}
|
||||
_ => {
|
||||
// clean for tql have no use for time window
|
||||
self.state.write().unwrap().dirty_time_windows.clean();
|
||||
|
||||
// since no time window lower/upper bound is found, just return the original query(with auto columns)
|
||||
return Ok(Some(PlanInfo { plan, filter: None }));
|
||||
};
|
||||
let plan = gen_plan_with_matching_schema(
|
||||
&self.config.query,
|
||||
query_ctx,
|
||||
engine,
|
||||
sink_table_schema.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
return Ok(Some(PlanInfo { plan, filter: None }));
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Flow id = {:?}, found time window: precise_lower_bound={:?}, precise_upper_bound={:?} with dirty time windows: {:?}",
|
||||
self.config.flow_id, l, u, self.state.read().unwrap().dirty_time_windows
|
||||
self.config.flow_id, expire_lower_bound, expire_upper_bound, self.state.read().unwrap().dirty_time_windows
|
||||
);
|
||||
let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Can't get window size from {u:?} - {l:?}"),
|
||||
})?;
|
||||
let window_size = expire_upper_bound
|
||||
.sub(&expire_lower_bound)
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Can't get window size from {expire_upper_bound:?} - {expire_lower_bound:?}"
|
||||
),
|
||||
})?;
|
||||
let col_name = self
|
||||
.config
|
||||
.time_window_expr
|
||||
@@ -673,7 +694,7 @@ impl BatchingTask {
|
||||
.dirty_time_windows
|
||||
.gen_filter_exprs(
|
||||
&col_name,
|
||||
Some(l),
|
||||
Some(expire_lower_bound),
|
||||
window_size,
|
||||
max_window_cnt
|
||||
.unwrap_or(self.config.batch_opts.experimental_max_filter_num_per_query),
|
||||
@@ -701,7 +722,7 @@ impl BatchingTask {
|
||||
};
|
||||
|
||||
let mut add_filter = AddFilterRewriter::new(expr.expr.clone());
|
||||
let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone());
|
||||
let mut add_auto_column = ColumnMatcherRewriter::new(sink_table_schema.clone());
|
||||
|
||||
let plan =
|
||||
sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false).await?;
|
||||
@@ -714,7 +735,7 @@ impl BatchingTask {
|
||||
})?
|
||||
.data;
|
||||
// only apply optimize after complex rewrite is done
|
||||
let new_plan = apply_df_optimizer(rewrite).await?;
|
||||
let new_plan = apply_df_optimizer(rewrite, &query_ctx).await?;
|
||||
|
||||
let info = PlanInfo {
|
||||
plan: new_plan.clone(),
|
||||
@@ -732,7 +753,25 @@ fn create_table_with_expr(
|
||||
sink_table_name: &[String; 3],
|
||||
query_type: &QueryType,
|
||||
) -> Result<CreateTableExpr, Error> {
|
||||
let (first_time_stamp, primary_keys) = build_primary_key_constraint(plan)?;
|
||||
let table_def = match query_type {
|
||||
&QueryType::Sql => {
|
||||
if let Some(def) = build_pk_from_aggr(plan)? {
|
||||
def
|
||||
} else {
|
||||
build_by_sql_schema(plan)?
|
||||
}
|
||||
}
|
||||
QueryType::Tql => {
|
||||
// first try build from aggr, then from tql schema because tql query might not have aggr node
|
||||
if let Some(table_def) = build_pk_from_aggr(plan)? {
|
||||
table_def
|
||||
} else {
|
||||
build_by_tql_schema(plan)?
|
||||
}
|
||||
}
|
||||
};
|
||||
let first_time_stamp = table_def.ts_col;
|
||||
let primary_keys = table_def.pks;
|
||||
|
||||
let mut column_schemas = Vec::new();
|
||||
for field in plan.schema().fields() {
|
||||
@@ -755,7 +794,7 @@ fn create_table_with_expr(
|
||||
let is_val_column = !is_tag_column && first_time_stamp.as_ref() != Some(name);
|
||||
if is_val_column {
|
||||
let col_schema =
|
||||
ColumnSchema::new("val", ConcreteDataType::float64_datatype(), true);
|
||||
ColumnSchema::new(name, ConcreteDataType::float64_datatype(), true);
|
||||
column_schemas.push(col_schema);
|
||||
} else if is_tag_column {
|
||||
let col_schema =
|
||||
@@ -809,15 +848,63 @@ fn create_table_with_expr(
|
||||
})
|
||||
}
|
||||
|
||||
/// simply build by schema, return first timestamp column and no primary key
|
||||
fn build_by_sql_schema(plan: &LogicalPlan) -> Result<TableDef, Error> {
|
||||
let first_time_stamp = plan.schema().fields().iter().find_map(|f| {
|
||||
if ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp() {
|
||||
Some(f.name().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
Ok(TableDef {
|
||||
ts_col: first_time_stamp,
|
||||
pks: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
/// Return first timestamp column found in output schema and all string columns
|
||||
fn build_by_tql_schema(plan: &LogicalPlan) -> Result<TableDef, Error> {
|
||||
let first_time_stamp = plan.schema().fields().iter().find_map(|f| {
|
||||
if ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp() {
|
||||
Some(f.name().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
let string_columns = plan
|
||||
.schema()
|
||||
.fields()
|
||||
.iter()
|
||||
.filter_map(|f| {
|
||||
if ConcreteDataType::from_arrow_type(f.data_type()).is_string() {
|
||||
Some(f.name().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(TableDef {
|
||||
ts_col: first_time_stamp,
|
||||
pks: string_columns,
|
||||
})
|
||||
}
|
||||
|
||||
struct TableDef {
|
||||
ts_col: Option<String>,
|
||||
pks: Vec<String>,
|
||||
}
|
||||
|
||||
/// Return first timestamp column which is in group by clause and other columns which are also in group by clause
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Option<String>` - first timestamp column which is in group by clause
|
||||
/// * `Vec<String>` - other columns which are also in group by clause
|
||||
fn build_primary_key_constraint(
|
||||
plan: &LogicalPlan,
|
||||
) -> Result<(Option<String>, Vec<String>), Error> {
|
||||
///
|
||||
/// if no aggregation found, return None
|
||||
fn build_pk_from_aggr(plan: &LogicalPlan) -> Result<Option<TableDef>, Error> {
|
||||
let fields = plan.schema().fields();
|
||||
let mut pk_names = FindGroupByFinalName::default();
|
||||
|
||||
@@ -827,13 +914,18 @@ fn build_primary_key_constraint(
|
||||
})?;
|
||||
|
||||
// if no group by clause, return empty with first timestamp column found in output schema
|
||||
let pk_final_names = pk_names.get_group_expr_names().unwrap_or_default();
|
||||
let Some(pk_final_names) = pk_names.get_group_expr_names() else {
|
||||
return Ok(None);
|
||||
};
|
||||
if pk_final_names.is_empty() {
|
||||
let first_ts_col = fields
|
||||
.iter()
|
||||
.find(|f| ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp())
|
||||
.map(|f| f.name().clone());
|
||||
return Ok((first_ts_col, Vec::new()));
|
||||
return Ok(Some(TableDef {
|
||||
ts_col: first_ts_col,
|
||||
pks: vec![],
|
||||
}));
|
||||
}
|
||||
|
||||
let all_pk_cols: Vec<_> = fields
|
||||
@@ -855,7 +947,10 @@ fn build_primary_key_constraint(
|
||||
.filter(|col| first_time_stamp != Some(col.to_string()))
|
||||
.collect();
|
||||
|
||||
Ok((first_time_stamp, all_pk_cols))
|
||||
Ok(Some(TableDef {
|
||||
ts_col: first_time_stamp,
|
||||
pks: all_pk_cols,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -24,7 +24,7 @@ use datafusion::error::Result as DfResult;
|
||||
use datafusion::logical_expr::Expr;
|
||||
use datafusion::sql::unparser::Unparser;
|
||||
use datafusion_common::tree_node::{
|
||||
Transformed, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
|
||||
Transformed, TreeNode as _, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
|
||||
};
|
||||
use datafusion_common::{DFSchema, DataFusionError, ScalarValue};
|
||||
use datafusion_expr::{Distinct, LogicalPlan, Projection};
|
||||
@@ -122,19 +122,40 @@ pub async fn sql_to_df_plan(
|
||||
};
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(&query_stmt, query_ctx)
|
||||
.plan(&query_stmt, query_ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let plan = if optimize {
|
||||
apply_df_optimizer(plan).await?
|
||||
apply_df_optimizer(plan, &query_ctx).await?
|
||||
} else {
|
||||
plan
|
||||
};
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
/// Generate a plan that matches the schema of the sink table
|
||||
/// from given sql by alias and adding auto columns
|
||||
pub(crate) async fn gen_plan_with_matching_schema(
|
||||
sql: &str,
|
||||
query_ctx: QueryContextRef,
|
||||
engine: QueryEngineRef,
|
||||
sink_table_schema: SchemaRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), sql, false).await?;
|
||||
|
||||
let mut add_auto_column = ColumnMatcherRewriter::new(sink_table_schema);
|
||||
let plan = plan
|
||||
.clone()
|
||||
.rewrite(&mut add_auto_column)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to rewrite plan:\n {}\n", plan),
|
||||
})?
|
||||
.data;
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
pub fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
|
||||
/// A dialect that forces identifiers to be quoted when have uppercase
|
||||
struct ForceQuoteIdentifiers;
|
||||
@@ -239,19 +260,19 @@ impl TreeNodeVisitor<'_> for FindGroupByFinalName {
|
||||
}
|
||||
}
|
||||
|
||||
/// Add to the final select columns like `update_at`
|
||||
/// Optionally add to the final select columns like `update_at` if the sink table has such column
|
||||
/// (which doesn't necessary need to have exact name just need to be a extra timestamp column)
|
||||
/// and `__ts_placeholder`(this column need to have exact this name and be a timestamp)
|
||||
/// with values like `now()` and `0`
|
||||
///
|
||||
/// it also give existing columns alias to column in sink table if needed
|
||||
#[derive(Debug)]
|
||||
pub struct AddAutoColumnRewriter {
|
||||
pub struct ColumnMatcherRewriter {
|
||||
pub schema: SchemaRef,
|
||||
pub is_rewritten: bool,
|
||||
}
|
||||
|
||||
impl AddAutoColumnRewriter {
|
||||
impl ColumnMatcherRewriter {
|
||||
pub fn new(schema: SchemaRef) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
@@ -348,7 +369,7 @@ impl AddAutoColumnRewriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for AddAutoColumnRewriter {
|
||||
impl TreeNodeRewriter for ColumnMatcherRewriter {
|
||||
type Node = LogicalPlan;
|
||||
fn f_down(&mut self, mut node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
if self.is_rewritten {
|
||||
@@ -696,7 +717,7 @@ mod test {
|
||||
let ctx = QueryContext::arc();
|
||||
for (before, after, column_schemas) in testcases {
|
||||
let schema = Arc::new(Schema::new(column_schemas));
|
||||
let mut add_auto_column_rewriter = AddAutoColumnRewriter::new(schema);
|
||||
let mut add_auto_column_rewriter = ColumnMatcherRewriter::new(schema);
|
||||
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), before, false)
|
||||
.await
|
||||
|
||||
@@ -44,6 +44,7 @@ use query::optimizer::count_wildcard::CountWildcardToTimeIndexRule;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::query_engine::DefaultSerializer;
|
||||
use query::QueryEngine;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
/// note here we are using the `substrait_proto_df` crate from the `substrait` module and
|
||||
/// rename it to `substrait_proto`
|
||||
@@ -57,8 +58,9 @@ use crate::plan::TypedPlan;
|
||||
// TODO(discord9): use `Analyzer` to manage rules if more `AnalyzerRule` is needed
|
||||
pub async fn apply_df_optimizer(
|
||||
plan: datafusion_expr::LogicalPlan,
|
||||
query_ctx: &QueryContextRef,
|
||||
) -> Result<datafusion_expr::LogicalPlan, Error> {
|
||||
let cfg = ConfigOptions::new();
|
||||
let cfg = query_ctx.create_config_options();
|
||||
let analyzer = Analyzer::with_rules(vec![
|
||||
Arc::new(CountWildcardToTimeIndexRule),
|
||||
Arc::new(AvgExpandRule),
|
||||
@@ -107,12 +109,12 @@ pub async fn sql_to_flow_plan(
|
||||
.context(ExternalSnafu)?;
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(&stmt, query_ctx)
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let opted_plan = apply_df_optimizer(plan).await?;
|
||||
let opted_plan = apply_df_optimizer(plan, &query_ctx).await?;
|
||||
|
||||
// TODO(discord9): add df optimization
|
||||
let sub_plan = DFLogicalSubstraitConvertor {}
|
||||
|
||||
@@ -172,7 +172,9 @@ pub async fn sql_to_substrait(engine: Arc<dyn QueryEngine>, sql: &str) -> proto:
|
||||
.plan(&stmt, QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
let plan = apply_df_optimizer(plan).await.unwrap();
|
||||
let plan = apply_df_optimizer(plan, &QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// encode then decode so to rely on the impl of conversion from logical plan to substrait plan
|
||||
let bytes = DFLogicalSubstraitConvertor {}
|
||||
|
||||
@@ -293,7 +293,9 @@ mod test {
|
||||
.plan(&stmt, QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
let plan = apply_df_optimizer(plan).await.unwrap();
|
||||
let plan = apply_df_optimizer(plan, &QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// encode then decode so to rely on the impl of conversion from logical plan to substrait plan
|
||||
let bytes = DFLogicalSubstraitConvertor {}
|
||||
@@ -315,7 +317,7 @@ mod test {
|
||||
.plan(&stmt, QueryContext::arc())
|
||||
.await
|
||||
.unwrap();
|
||||
let plan = apply_df_optimizer(plan).await;
|
||||
let plan = apply_df_optimizer(plan, &QueryContext::arc()).await;
|
||||
|
||||
assert!(plan.is_err());
|
||||
}
|
||||
|
||||
@@ -376,34 +376,16 @@ impl Instance {
|
||||
ctx: QueryContextRef,
|
||||
) -> server_error::Result<bool> {
|
||||
let db_string = ctx.get_db_string();
|
||||
// fast cache check
|
||||
let cache = self
|
||||
.otlp_metrics_table_legacy_cache
|
||||
.entry(db_string)
|
||||
.entry(db_string.clone())
|
||||
.or_default();
|
||||
|
||||
// check cache
|
||||
let hit_cache = names
|
||||
.iter()
|
||||
.filter_map(|name| cache.get(*name))
|
||||
.collect::<Vec<_>>();
|
||||
if !hit_cache.is_empty() {
|
||||
let hit_legacy = hit_cache.iter().any(|en| *en.value());
|
||||
let hit_prom = hit_cache.iter().any(|en| !*en.value());
|
||||
|
||||
// hit but have true and false, means both legacy and new mode are used
|
||||
// we cannot handle this case, so return error
|
||||
// add doc links in err msg later
|
||||
ensure!(!(hit_legacy && hit_prom), OtlpMetricModeIncompatibleSnafu);
|
||||
|
||||
let flag = hit_legacy;
|
||||
// set cache for all names
|
||||
names.iter().for_each(|name| {
|
||||
if !cache.contains_key(*name) {
|
||||
cache.insert(name.to_string(), flag);
|
||||
}
|
||||
});
|
||||
if let Some(flag) = fast_legacy_check(&cache, names)? {
|
||||
return Ok(flag);
|
||||
}
|
||||
// release cache reference to avoid lock contention
|
||||
drop(cache);
|
||||
|
||||
let catalog = ctx.current_catalog();
|
||||
let schema = ctx.current_schema();
|
||||
@@ -430,7 +412,10 @@ impl Instance {
|
||||
|
||||
// means no existing table is found, use new mode
|
||||
if table_ids.is_empty() {
|
||||
// set cache
|
||||
let cache = self
|
||||
.otlp_metrics_table_legacy_cache
|
||||
.entry(db_string)
|
||||
.or_default();
|
||||
names.iter().for_each(|name| {
|
||||
cache.insert(name.to_string(), false);
|
||||
});
|
||||
@@ -455,6 +440,10 @@ impl Instance {
|
||||
.unwrap_or(&OTLP_LEGACY_DEFAULT_VALUE)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let cache = self
|
||||
.otlp_metrics_table_legacy_cache
|
||||
.entry(db_string)
|
||||
.or_default();
|
||||
if !options.is_empty() {
|
||||
// check value consistency
|
||||
let has_prom = options.iter().any(|opt| *opt == OTLP_METRIC_COMPAT_PROM);
|
||||
@@ -477,6 +466,39 @@ impl Instance {
|
||||
}
|
||||
}
|
||||
|
||||
fn fast_legacy_check(
|
||||
cache: &DashMap<String, bool>,
|
||||
names: &[&String],
|
||||
) -> server_error::Result<Option<bool>> {
|
||||
let hit_cache = names
|
||||
.iter()
|
||||
.filter_map(|name| cache.get(*name))
|
||||
.collect::<Vec<_>>();
|
||||
if !hit_cache.is_empty() {
|
||||
let hit_legacy = hit_cache.iter().any(|en| *en.value());
|
||||
let hit_prom = hit_cache.iter().any(|en| !*en.value());
|
||||
|
||||
// hit but have true and false, means both legacy and new mode are used
|
||||
// we cannot handle this case, so return error
|
||||
// add doc links in err msg later
|
||||
ensure!(!(hit_legacy && hit_prom), OtlpMetricModeIncompatibleSnafu);
|
||||
|
||||
let flag = hit_legacy;
|
||||
// drop hit_cache to release references before inserting to avoid deadlock
|
||||
drop(hit_cache);
|
||||
|
||||
// set cache for all names
|
||||
names.iter().for_each(|name| {
|
||||
if !cache.contains_key(*name) {
|
||||
cache.insert(name.to_string(), flag);
|
||||
}
|
||||
});
|
||||
Ok(Some(flag))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// If the relevant variables are set, the timeout is enforced for all PostgreSQL statements.
|
||||
/// For MySQL, it applies only to read-only statements.
|
||||
fn derive_timeout(stmt: &Statement, query_ctx: &QueryContextRef) -> Option<Duration> {
|
||||
@@ -1039,6 +1061,10 @@ fn should_capture_statement(stmt: Option<&Statement>) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_base::Plugins;
|
||||
use query::query_engine::options::QueryOptions;
|
||||
@@ -1048,6 +1074,122 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fast_legacy_check_deadlock_prevention() {
|
||||
// Create a DashMap to simulate the cache
|
||||
let cache = DashMap::new();
|
||||
|
||||
// Pre-populate cache with some entries
|
||||
cache.insert("metric1".to_string(), true); // legacy mode
|
||||
cache.insert("metric2".to_string(), false); // prom mode
|
||||
cache.insert("metric3".to_string(), true); // legacy mode
|
||||
|
||||
// Test case 1: Normal operation with cache hits
|
||||
let metric1 = "metric1".to_string();
|
||||
let metric4 = "metric4".to_string();
|
||||
let names1 = vec![&metric1, &metric4];
|
||||
let result = fast_legacy_check(&cache, &names1);
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), Some(true)); // should return legacy mode
|
||||
|
||||
// Verify that metric4 was added to cache
|
||||
assert!(cache.contains_key("metric4"));
|
||||
assert!(*cache.get("metric4").unwrap().value());
|
||||
|
||||
// Test case 2: No cache hits
|
||||
let metric5 = "metric5".to_string();
|
||||
let metric6 = "metric6".to_string();
|
||||
let names2 = vec![&metric5, &metric6];
|
||||
let result = fast_legacy_check(&cache, &names2);
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), None); // should return None as no cache hits
|
||||
|
||||
// Test case 3: Incompatible modes should return error
|
||||
let cache_incompatible = DashMap::new();
|
||||
cache_incompatible.insert("metric1".to_string(), true); // legacy
|
||||
cache_incompatible.insert("metric2".to_string(), false); // prom
|
||||
let metric1_test = "metric1".to_string();
|
||||
let metric2_test = "metric2".to_string();
|
||||
let names3 = vec![&metric1_test, &metric2_test];
|
||||
let result = fast_legacy_check(&cache_incompatible, &names3);
|
||||
assert!(result.is_err()); // should error due to incompatible modes
|
||||
|
||||
// Test case 4: Intensive concurrent access to test deadlock prevention
|
||||
// This test specifically targets the scenario where multiple threads
|
||||
// access the same cache entries simultaneously
|
||||
let cache_concurrent = Arc::new(DashMap::new());
|
||||
cache_concurrent.insert("shared_metric".to_string(), true);
|
||||
|
||||
let num_threads = 8;
|
||||
let operations_per_thread = 100;
|
||||
let barrier = Arc::new(Barrier::new(num_threads));
|
||||
let success_flag = Arc::new(AtomicBool::new(true));
|
||||
|
||||
let handles: Vec<_> = (0..num_threads)
|
||||
.map(|thread_id| {
|
||||
let cache_clone = Arc::clone(&cache_concurrent);
|
||||
let barrier_clone = Arc::clone(&barrier);
|
||||
let success_flag_clone = Arc::clone(&success_flag);
|
||||
|
||||
thread::spawn(move || {
|
||||
// Wait for all threads to be ready
|
||||
barrier_clone.wait();
|
||||
|
||||
let start_time = Instant::now();
|
||||
for i in 0..operations_per_thread {
|
||||
// Each operation references existing cache entry and adds new ones
|
||||
let shared_metric = "shared_metric".to_string();
|
||||
let new_metric = format!("thread_{}_metric_{}", thread_id, i);
|
||||
let names = vec![&shared_metric, &new_metric];
|
||||
|
||||
match fast_legacy_check(&cache_clone, &names) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
success_flag_clone.store(false, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If the test takes too long, it likely means deadlock
|
||||
if start_time.elapsed() > Duration::from_secs(10) {
|
||||
success_flag_clone.store(false, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Join all threads with timeout
|
||||
let start_time = Instant::now();
|
||||
for (i, handle) in handles.into_iter().enumerate() {
|
||||
let join_result = handle.join();
|
||||
|
||||
// Check if we're taking too long (potential deadlock)
|
||||
if start_time.elapsed() > Duration::from_secs(30) {
|
||||
panic!("Test timed out - possible deadlock detected!");
|
||||
}
|
||||
|
||||
if join_result.is_err() {
|
||||
panic!("Thread {} panicked during execution", i);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify all operations completed successfully
|
||||
assert!(
|
||||
success_flag.load(Ordering::Relaxed),
|
||||
"Some operations failed"
|
||||
);
|
||||
|
||||
// Verify that many new entries were added (proving operations completed)
|
||||
let final_count = cache_concurrent.len();
|
||||
assert!(
|
||||
final_count > 1 + num_threads * operations_per_thread / 2,
|
||||
"Expected more cache entries, got {}",
|
||||
final_count
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exec_validation() {
|
||||
let query_ctx = QueryContext::arc();
|
||||
|
||||
@@ -139,9 +139,6 @@ pub enum Error {
|
||||
error: rskafka::client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to resolve Kafka broker endpoint."))]
|
||||
ResolveKafkaEndpoint { source: common_wal::error::Error },
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to build a Kafka partition client, topic: {}, partition: {}",
|
||||
topic,
|
||||
@@ -343,7 +340,6 @@ impl ErrorExt for Error {
|
||||
StartWalTask { .. }
|
||||
| StopWalTask { .. }
|
||||
| IllegalState { .. }
|
||||
| ResolveKafkaEndpoint { .. }
|
||||
| NoMaxValue { .. }
|
||||
| Cast { .. }
|
||||
| EncodeJson { .. }
|
||||
|
||||
@@ -24,9 +24,7 @@ use snafu::ResultExt;
|
||||
use store_api::logstore::provider::KafkaProvider;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use crate::error::{
|
||||
BuildClientSnafu, BuildPartitionClientSnafu, ResolveKafkaEndpointSnafu, Result, TlsConfigSnafu,
|
||||
};
|
||||
use crate::error::{BuildClientSnafu, BuildPartitionClientSnafu, Result, TlsConfigSnafu};
|
||||
use crate::kafka::index::{GlobalIndexCollector, NoopCollector};
|
||||
use crate::kafka::log_store::TopicStat;
|
||||
use crate::kafka::producer::{OrderedBatchProducer, OrderedBatchProducerRef};
|
||||
@@ -79,11 +77,8 @@ impl ClientManager {
|
||||
topic_stats: Arc<DashMap<Arc<KafkaProvider>, TopicStat>>,
|
||||
) -> Result<Self> {
|
||||
// Sets backoff config for the top-level kafka client and all clients constructed by it.
|
||||
let broker_endpoints = common_wal::resolve_to_ipv4(&config.connection.broker_endpoints)
|
||||
.await
|
||||
.context(ResolveKafkaEndpointSnafu)?;
|
||||
let mut builder =
|
||||
ClientBuilder::new(broker_endpoints).backoff_config(DEFAULT_BACKOFF_CONFIG);
|
||||
let mut builder = ClientBuilder::new(config.connection.broker_endpoints.clone())
|
||||
.backoff_config(DEFAULT_BACKOFF_CONFIG);
|
||||
if let Some(sasl) = &config.connection.sasl {
|
||||
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
|
||||
};
|
||||
|
||||
@@ -302,6 +302,10 @@ impl LogStore for KafkaLogStore {
|
||||
},
|
||||
))
|
||||
.await?;
|
||||
debug!(
|
||||
"Appended batch to Kafka, region_grouped_max_offset: {:?}",
|
||||
region_grouped_max_offset
|
||||
);
|
||||
|
||||
Ok(AppendBatchResponse {
|
||||
last_entry_ids: region_grouped_max_offset.into_iter().collect(),
|
||||
@@ -362,6 +366,17 @@ impl LogStore for KafkaLogStore {
|
||||
.context(GetOffsetSnafu {
|
||||
topic: &provider.topic,
|
||||
})?;
|
||||
let latest_offset = (end_offset as u64).saturating_sub(1);
|
||||
self.topic_stats
|
||||
.entry(provider.clone())
|
||||
.and_modify(|stat| {
|
||||
stat.latest_offset = stat.latest_offset.max(latest_offset);
|
||||
})
|
||||
.or_insert_with(|| TopicStat {
|
||||
latest_offset,
|
||||
record_size: 0,
|
||||
record_num: 0,
|
||||
});
|
||||
|
||||
let region_indexes = if let (Some(index), Some(collector)) =
|
||||
(index, self.client_manager.global_index_collector())
|
||||
@@ -550,6 +565,7 @@ mod tests {
|
||||
use futures::TryStreamExt;
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::Rng;
|
||||
use rskafka::client::partition::OffsetAt;
|
||||
use store_api::logstore::entry::{Entry, MultiplePartEntry, MultiplePartHeader, NaiveEntry};
|
||||
use store_api::logstore::provider::Provider;
|
||||
use store_api::logstore::LogStore;
|
||||
@@ -713,8 +729,16 @@ mod tests {
|
||||
.for_each(|entry| entry.set_entry_id(0));
|
||||
assert_eq!(expected_entries, actual_entries);
|
||||
}
|
||||
let high_wathermark = logstore.latest_entry_id(&provider).unwrap();
|
||||
assert_eq!(high_wathermark, 99);
|
||||
let latest_entry_id = logstore.latest_entry_id(&provider).unwrap();
|
||||
let client = logstore
|
||||
.client_manager
|
||||
.get_or_insert(provider.as_kafka_provider().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(latest_entry_id, 99);
|
||||
// The latest offset is the offset of the last record plus one.
|
||||
let latest = client.client().get_offset(OffsetAt::Latest).await.unwrap();
|
||||
assert_eq!(latest, 100);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -112,11 +112,11 @@ mod tests {
|
||||
let current_latest_offset = topic_stats.get(&provider).unwrap().latest_offset;
|
||||
assert_eq!(current_latest_offset, 0);
|
||||
|
||||
let record = vec![record()];
|
||||
let record = vec![record(), record()];
|
||||
let region = RegionId::new(1, 1);
|
||||
producer.produce(region, record.clone()).await.unwrap();
|
||||
tokio::time::sleep(Duration::from_millis(150)).await;
|
||||
let current_latest_offset = topic_stats.get(&provider).unwrap().latest_offset;
|
||||
assert_eq!(current_latest_offset, record.len() as u64);
|
||||
assert_eq!(current_latest_offset, record.len() as u64 - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,30 +33,34 @@ impl BackgroundProducerWorker {
|
||||
.context(error::GetOffsetSnafu {
|
||||
topic: &self.provider.topic,
|
||||
}) {
|
||||
Ok(offset) => match self.topic_stats.entry(self.provider.clone()) {
|
||||
dashmap::Entry::Occupied(mut occupied_entry) => {
|
||||
let offset = offset as u64;
|
||||
let stat = occupied_entry.get_mut();
|
||||
if stat.latest_offset < offset {
|
||||
stat.latest_offset = offset;
|
||||
Ok(highwatermark) => {
|
||||
// The highwatermark is the offset of the last record plus one.
|
||||
let offset = (highwatermark as u64).saturating_sub(1);
|
||||
|
||||
match self.topic_stats.entry(self.provider.clone()) {
|
||||
dashmap::Entry::Occupied(mut occupied_entry) => {
|
||||
let stat = occupied_entry.get_mut();
|
||||
if stat.latest_offset < offset {
|
||||
stat.latest_offset = offset;
|
||||
debug!(
|
||||
"Updated latest offset for topic {} to {}",
|
||||
self.provider.topic, offset
|
||||
);
|
||||
}
|
||||
}
|
||||
dashmap::Entry::Vacant(vacant_entry) => {
|
||||
vacant_entry.insert(TopicStat {
|
||||
latest_offset: offset,
|
||||
record_size: 0,
|
||||
record_num: 0,
|
||||
});
|
||||
debug!(
|
||||
"Updated latest offset for topic {} to {}",
|
||||
"Inserted latest offset for topic {} to {}",
|
||||
self.provider.topic, offset
|
||||
);
|
||||
}
|
||||
}
|
||||
dashmap::Entry::Vacant(vacant_entry) => {
|
||||
vacant_entry.insert(TopicStat {
|
||||
latest_offset: offset as u64,
|
||||
record_size: 0,
|
||||
record_num: 0,
|
||||
});
|
||||
debug!(
|
||||
"Inserted latest offset for topic {} to {}",
|
||||
self.provider.topic, offset
|
||||
);
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to get latest offset for topic {}", self.provider.topic);
|
||||
}
|
||||
|
||||
@@ -461,6 +461,7 @@ fn build_connection_options(tls_config: Option<&TlsOption>) -> Result<Option<Con
|
||||
if matches!(tls_config.mode, TlsMode::Disable) {
|
||||
return Ok(None);
|
||||
}
|
||||
info!("Creating etcd client with TLS mode: {:?}", tls_config.mode);
|
||||
let mut etcd_tls_opts = TlsOptions::new();
|
||||
// Set CA certificate if provided
|
||||
if !tls_config.ca_cert_path.is_empty() {
|
||||
|
||||
@@ -152,13 +152,9 @@ fn align_ts(ts: i64, interval: Duration) -> i64 {
|
||||
impl PersistStatsHandler {
|
||||
/// Creates a new [`PersistStatsHandler`].
|
||||
pub fn new(inserter: Box<dyn Inserter>, mut persist_interval: Duration) -> Self {
|
||||
if persist_interval < Duration::from_secs(60) {
|
||||
warn!("persist_interval is less than 60 seconds, set to 60 seconds");
|
||||
persist_interval = Duration::from_secs(60);
|
||||
}
|
||||
if persist_interval.as_millis() == 0 {
|
||||
warn!("persist_interval as milliseconds is zero, set to 60 second");
|
||||
persist_interval = Duration::from_secs(60);
|
||||
if persist_interval < Duration::from_mins(10) {
|
||||
warn!("persist_interval is less than 10 minutes, set to 10 minutes");
|
||||
persist_interval = Duration::from_mins(10);
|
||||
}
|
||||
|
||||
Self {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#![feature(assert_matches)]
|
||||
#![feature(hash_set_entry)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(duration_constructors_lite)]
|
||||
#![feature(duration_constructors)]
|
||||
|
||||
pub mod bootstrap;
|
||||
|
||||
@@ -114,8 +114,8 @@ pub struct StatsPersistenceOptions {
|
||||
impl Default for StatsPersistenceOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ttl: Duration::from_days(30),
|
||||
interval: Duration::from_secs(60),
|
||||
ttl: Duration::ZERO,
|
||||
interval: Duration::from_mins(10),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ lazy_static! {
|
||||
.unwrap();
|
||||
/// The triggered region flush total counter.
|
||||
pub static ref METRIC_META_TRIGGERED_REGION_FLUSH_TOTAL: IntCounterVec =
|
||||
register_int_counter_vec!("meta_triggered_region_flush_total", "meta triggered region flush total", &["topic_name", "region_type"]).unwrap();
|
||||
register_int_counter_vec!("meta_triggered_region_flush_total", "meta triggered region flush total", &["topic_name"]).unwrap();
|
||||
|
||||
/// The triggered region checkpoint total counter.
|
||||
pub static ref METRIC_META_TRIGGERED_REGION_CHECKPOINT_TOTAL: IntCounterVec =
|
||||
|
||||
@@ -19,7 +19,6 @@ use api::v1::meta::MailboxMessage;
|
||||
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
|
||||
use common_meta::key::datanode_table::RegionInfo;
|
||||
use common_meta::wal_options_allocator::extract_topic_from_wal_options;
|
||||
use common_meta::RegionIdent;
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::info;
|
||||
@@ -68,7 +67,6 @@ impl OpenCandidateRegion {
|
||||
async fn build_open_region_instruction(&self, ctx: &mut Context) -> Result<Instruction> {
|
||||
let pc = &ctx.persistent_ctx;
|
||||
let table_id = pc.region_id.table_id();
|
||||
let region_id = pc.region_id;
|
||||
let region_number = pc.region_id.region_number();
|
||||
let candidate_id = pc.to_peer.id;
|
||||
let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?;
|
||||
@@ -80,31 +78,18 @@ impl OpenCandidateRegion {
|
||||
engine,
|
||||
} = datanode_table_value.region_info.clone();
|
||||
|
||||
let checkpoint =
|
||||
if let Some(topic) = extract_topic_from_wal_options(region_id, ®ion_wal_options) {
|
||||
ctx.fetch_replay_checkpoint(&topic).await.ok().flatten()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let open_instruction = Instruction::OpenRegion(
|
||||
OpenRegion::new(
|
||||
RegionIdent {
|
||||
datanode_id: candidate_id,
|
||||
table_id,
|
||||
region_number,
|
||||
engine,
|
||||
},
|
||||
®ion_storage_path,
|
||||
region_options,
|
||||
region_wal_options,
|
||||
true,
|
||||
)
|
||||
.with_replay_entry_id(checkpoint.map(|checkpoint| checkpoint.entry_id))
|
||||
.with_metadata_replay_entry_id(
|
||||
checkpoint.and_then(|checkpoint| checkpoint.metadata_entry_id),
|
||||
),
|
||||
);
|
||||
let open_instruction = Instruction::OpenRegion(OpenRegion::new(
|
||||
RegionIdent {
|
||||
datanode_id: candidate_id,
|
||||
table_id,
|
||||
region_number,
|
||||
engine,
|
||||
},
|
||||
®ion_storage_path,
|
||||
region_options,
|
||||
region_wal_options,
|
||||
true,
|
||||
));
|
||||
|
||||
Ok(open_instruction)
|
||||
}
|
||||
@@ -241,8 +226,6 @@ mod tests {
|
||||
region_options: Default::default(),
|
||||
region_wal_options: Default::default(),
|
||||
skip_wal_replay: true,
|
||||
replay_entry_id: None,
|
||||
metadata_replay_entry_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ use api::v1::meta::MailboxMessage;
|
||||
use common_meta::ddl::utils::parse_region_wal_options;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, UpgradeRegion, UpgradeRegionReply};
|
||||
use common_meta::lock_key::RemoteWalLock;
|
||||
use common_meta::wal_options_allocator::extract_topic_from_wal_options;
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::{error, warn};
|
||||
use common_wal::options::WalOptions;
|
||||
@@ -111,23 +112,40 @@ impl UpgradeCandidateRegion {
|
||||
}
|
||||
|
||||
/// Builds upgrade region instruction.
|
||||
fn build_upgrade_region_instruction(
|
||||
async fn build_upgrade_region_instruction(
|
||||
&self,
|
||||
ctx: &Context,
|
||||
ctx: &mut Context,
|
||||
replay_timeout: Duration,
|
||||
) -> Instruction {
|
||||
) -> Result<Instruction> {
|
||||
let pc = &ctx.persistent_ctx;
|
||||
let region_id = pc.region_id;
|
||||
let last_entry_id = ctx.volatile_ctx.leader_region_last_entry_id;
|
||||
let metadata_last_entry_id = ctx.volatile_ctx.leader_region_metadata_last_entry_id;
|
||||
// Try our best to retrieve replay checkpoint.
|
||||
let datanode_table_value = ctx.get_from_peer_datanode_table_value().await.ok();
|
||||
let checkpoint = if let Some(topic) = datanode_table_value.as_ref().and_then(|v| {
|
||||
extract_topic_from_wal_options(region_id, &v.region_info.region_wal_options)
|
||||
}) {
|
||||
ctx.fetch_replay_checkpoint(&topic).await.ok().flatten()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Instruction::UpgradeRegion(UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id,
|
||||
metadata_last_entry_id,
|
||||
replay_timeout: Some(replay_timeout),
|
||||
location_id: Some(ctx.persistent_ctx.from_peer.id),
|
||||
})
|
||||
let upgrade_instruction = Instruction::UpgradeRegion(
|
||||
UpgradeRegion {
|
||||
region_id,
|
||||
last_entry_id,
|
||||
metadata_last_entry_id,
|
||||
replay_timeout: Some(replay_timeout),
|
||||
location_id: Some(ctx.persistent_ctx.from_peer.id),
|
||||
replay_entry_id: None,
|
||||
metadata_replay_entry_id: None,
|
||||
}
|
||||
.with_replay_entry_id(checkpoint.map(|c| c.entry_id))
|
||||
.with_metadata_replay_entry_id(checkpoint.and_then(|c| c.metadata_entry_id)),
|
||||
);
|
||||
|
||||
Ok(upgrade_instruction)
|
||||
}
|
||||
|
||||
/// Tries to upgrade a candidate region.
|
||||
@@ -144,16 +162,19 @@ impl UpgradeCandidateRegion {
|
||||
/// - [UnexpectedInstructionReply](error::Error::UnexpectedInstructionReply) (impossible).
|
||||
/// - [ExceededDeadline](error::Error::ExceededDeadline)
|
||||
/// - Invalid JSON (impossible).
|
||||
async fn upgrade_region(&self, ctx: &Context) -> Result<()> {
|
||||
let pc = &ctx.persistent_ctx;
|
||||
let region_id = pc.region_id;
|
||||
let candidate = &pc.to_peer;
|
||||
async fn upgrade_region(&self, ctx: &mut Context) -> Result<()> {
|
||||
let operation_timeout =
|
||||
ctx.next_operation_timeout()
|
||||
.context(error::ExceededDeadlineSnafu {
|
||||
operation: "Upgrade region",
|
||||
})?;
|
||||
let upgrade_instruction = self.build_upgrade_region_instruction(ctx, operation_timeout);
|
||||
let upgrade_instruction = self
|
||||
.build_upgrade_region_instruction(ctx, operation_timeout)
|
||||
.await?;
|
||||
|
||||
let pc = &ctx.persistent_ctx;
|
||||
let region_id = pc.region_id;
|
||||
let candidate = &pc.to_peer;
|
||||
|
||||
let msg = MailboxMessage::json_message(
|
||||
&format!("Upgrade candidate region: {}", region_id),
|
||||
@@ -283,8 +304,12 @@ impl UpgradeCandidateRegion {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::test_utils::new_test_table_info;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
@@ -308,14 +333,33 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
async fn prepare_table_metadata(ctx: &Context, wal_options: HashMap<u32, String>) {
|
||||
let table_info =
|
||||
new_test_table_info(ctx.persistent_ctx.region_id.table_id(), vec![1]).into();
|
||||
let region_routes = vec![RegionRoute {
|
||||
region: Region::new_test(ctx.persistent_ctx.region_id),
|
||||
leader_peer: Some(ctx.persistent_ctx.from_peer.clone()),
|
||||
follower_peers: vec![ctx.persistent_ctx.to_peer.clone()],
|
||||
..Default::default()
|
||||
}];
|
||||
ctx.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
table_info,
|
||||
TableRouteValue::physical(region_routes),
|
||||
wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_datanode_is_unreachable() {
|
||||
let state = UpgradeCandidateRegion::default();
|
||||
let persistent_context = new_persistent_context();
|
||||
let env = TestingEnv::new();
|
||||
let ctx = env.context_factory().new_context(persistent_context);
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::PusherNotFound { .. });
|
||||
assert!(!err.is_retryable());
|
||||
@@ -328,7 +372,8 @@ mod tests {
|
||||
let to_peer_id = persistent_context.to_peer.id;
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let ctx = env.context_factory().new_context(persistent_context);
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(1);
|
||||
@@ -339,7 +384,7 @@ mod tests {
|
||||
|
||||
drop(rx);
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::PushMessage { .. });
|
||||
assert!(!err.is_retryable());
|
||||
@@ -351,10 +396,11 @@ mod tests {
|
||||
let persistent_context = new_persistent_context();
|
||||
let env = TestingEnv::new();
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
ctx.volatile_ctx.metrics.operations_elapsed =
|
||||
ctx.persistent_ctx.timeout + Duration::from_secs(1);
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::ExceededDeadline { .. });
|
||||
assert!(!err.is_retryable());
|
||||
@@ -367,7 +413,8 @@ mod tests {
|
||||
let to_peer_id = persistent_context.to_peer.id;
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let ctx = env.context_factory().new_context(persistent_context);
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
|
||||
@@ -379,7 +426,7 @@ mod tests {
|
||||
|
||||
send_mock_reply(mailbox, rx, |id| Ok(new_close_region_reply(id)));
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
assert_matches!(err, Error::UnexpectedInstructionReply { .. });
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
@@ -391,7 +438,8 @@ mod tests {
|
||||
let to_peer_id = persistent_context.to_peer.id;
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let ctx = env.context_factory().new_context(persistent_context);
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
|
||||
@@ -411,7 +459,7 @@ mod tests {
|
||||
))
|
||||
});
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
@@ -425,7 +473,8 @@ mod tests {
|
||||
let to_peer_id = persistent_context.to_peer.id;
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let ctx = env.context_factory().new_context(persistent_context);
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
|
||||
@@ -439,7 +488,7 @@ mod tests {
|
||||
Ok(new_upgrade_region_reply(id, true, false, None))
|
||||
});
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::Unexpected { .. });
|
||||
assert!(!err.is_retryable());
|
||||
@@ -457,7 +506,8 @@ mod tests {
|
||||
let to_peer_id = persistent_context.to_peer.id;
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let ctx = env.context_factory().new_context(persistent_context);
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
|
||||
@@ -471,7 +521,7 @@ mod tests {
|
||||
Ok(new_upgrade_region_reply(id, false, true, None))
|
||||
});
|
||||
|
||||
let err = state.upgrade_region(&ctx).await.unwrap_err();
|
||||
let err = state.upgrade_region(&mut ctx).await.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::RetryLater { .. });
|
||||
assert!(err.is_retryable());
|
||||
@@ -491,7 +541,7 @@ mod tests {
|
||||
Ok(new_upgrade_region_reply(id, false, true, None))
|
||||
});
|
||||
|
||||
state.upgrade_region(&ctx).await.unwrap();
|
||||
state.upgrade_region(&mut ctx).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -503,6 +553,7 @@ mod tests {
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
|
||||
@@ -563,6 +614,7 @@ mod tests {
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
|
||||
@@ -621,6 +673,7 @@ mod tests {
|
||||
|
||||
let mut env = TestingEnv::new();
|
||||
let mut ctx = env.context_factory().new_context(persistent_context);
|
||||
prepare_table_metadata(&ctx, HashMap::default()).await;
|
||||
let mailbox_ctx = env.mailbox_context();
|
||||
let mailbox = mailbox_ctx.mailbox().clone();
|
||||
ctx.volatile_ctx.metrics.operations_elapsed =
|
||||
|
||||
@@ -29,7 +29,6 @@ use common_time::util::current_time_millis;
|
||||
use common_wal::config::kafka::common::{
|
||||
DEFAULT_CHECKPOINT_TRIGGER_SIZE, DEFAULT_FLUSH_TRIGGER_SIZE,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc::{Receiver, Sender};
|
||||
@@ -223,31 +222,34 @@ impl RegionFlushTrigger {
|
||||
&self,
|
||||
topic: &str,
|
||||
region_ids: &[RegionId],
|
||||
topic_regions: &HashMap<RegionId, TopicRegionValue>,
|
||||
leader_regions: &HashMap<RegionId, LeaderRegion>,
|
||||
) -> Result<()> {
|
||||
if region_ids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let regions = region_ids
|
||||
.iter()
|
||||
.flat_map(|region_id| match leader_regions.get(region_id) {
|
||||
Some(leader_region) => {
|
||||
let entry_id = leader_region.manifest.replay_entry_id();
|
||||
let metadata_entry_id = leader_region.manifest.metadata_replay_entry_id();
|
||||
|
||||
Some((
|
||||
Some(leader_region) => should_persist_region_checkpoint(
|
||||
leader_region,
|
||||
topic_regions
|
||||
.get(region_id)
|
||||
.cloned()
|
||||
.and_then(|value| value.checkpoint),
|
||||
)
|
||||
.map(|checkpoint| {
|
||||
(
|
||||
TopicRegionKey::new(*region_id, topic),
|
||||
Some(TopicRegionValue::new(Some(ReplayCheckpoint::new(
|
||||
entry_id,
|
||||
metadata_entry_id,
|
||||
)))),
|
||||
))
|
||||
}
|
||||
Some(TopicRegionValue::new(Some(checkpoint))),
|
||||
)
|
||||
}),
|
||||
None => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// The`chunks` will panic if chunks_size is zero, so we return early if there are no regions to persist.
|
||||
if regions.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let max_txn_ops = self.table_metadata_manager.kv_backend().max_txn_ops();
|
||||
let batch_size = max_txn_ops.min(regions.len());
|
||||
for batch in regions.chunks(batch_size) {
|
||||
@@ -271,14 +273,14 @@ impl RegionFlushTrigger {
|
||||
latest_entry_id: u64,
|
||||
avg_record_size: usize,
|
||||
) -> Result<()> {
|
||||
let region_ids = self
|
||||
let topic_regions = self
|
||||
.table_metadata_manager
|
||||
.topic_region_manager()
|
||||
.regions(topic)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)?;
|
||||
|
||||
if region_ids.is_empty() {
|
||||
if topic_regions.is_empty() {
|
||||
debug!("No regions found for topic: {}", topic);
|
||||
return Ok(());
|
||||
}
|
||||
@@ -286,7 +288,7 @@ impl RegionFlushTrigger {
|
||||
// Filters regions need to persist checkpoints.
|
||||
let regions_to_persist = filter_regions_by_replay_size(
|
||||
topic,
|
||||
region_ids
|
||||
topic_regions
|
||||
.iter()
|
||||
.map(|(region_id, value)| (*region_id, value.min_entry_id().unwrap_or_default())),
|
||||
avg_record_size as u64,
|
||||
@@ -295,33 +297,25 @@ impl RegionFlushTrigger {
|
||||
);
|
||||
let region_manifests = self
|
||||
.leader_region_registry
|
||||
.batch_get(region_ids.keys().cloned());
|
||||
.batch_get(topic_regions.keys().cloned());
|
||||
|
||||
if let Err(err) = self
|
||||
.persist_region_checkpoints(topic, ®ions_to_persist, ®ion_manifests)
|
||||
.persist_region_checkpoints(
|
||||
topic,
|
||||
®ions_to_persist,
|
||||
&topic_regions,
|
||||
®ion_manifests,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!(err; "Failed to persist region checkpoints for topic: {}", topic);
|
||||
}
|
||||
|
||||
let (inactive_regions, active_regions): (Vec<_>, Vec<_>) = region_manifests
|
||||
let regions = region_manifests
|
||||
.into_iter()
|
||||
.partition_map(|(region_id, region)| {
|
||||
if !region.manifest.is_inactive() {
|
||||
itertools::Either::Left((region_id, region.manifest.prunable_entry_id()))
|
||||
} else {
|
||||
itertools::Either::Right((region_id, region.manifest.prunable_entry_id()))
|
||||
}
|
||||
});
|
||||
|
||||
let min_entry_id = inactive_regions
|
||||
.iter()
|
||||
.min_by_key(|(_, entry_id)| *entry_id);
|
||||
let min_entry_id = active_regions
|
||||
.iter()
|
||||
.min_by_key(|(_, entry_id)| *entry_id)
|
||||
.or(min_entry_id);
|
||||
|
||||
.map(|(region_id, region)| (region_id, region.manifest.prunable_entry_id()))
|
||||
.collect::<Vec<_>>();
|
||||
let min_entry_id = regions.iter().min_by_key(|(_, entry_id)| *entry_id);
|
||||
if let Some((_, min_entry_id)) = min_entry_id {
|
||||
let replay_size = (latest_entry_id.saturating_sub(*min_entry_id))
|
||||
.saturating_mul(avg_record_size as u64);
|
||||
@@ -331,45 +325,28 @@ impl RegionFlushTrigger {
|
||||
}
|
||||
|
||||
// Selects regions to flush from the set of active regions.
|
||||
let mut regions_to_flush = filter_regions_by_replay_size(
|
||||
let regions_to_flush = filter_regions_by_replay_size(
|
||||
topic,
|
||||
active_regions.into_iter(),
|
||||
regions.into_iter(),
|
||||
avg_record_size as u64,
|
||||
latest_entry_id,
|
||||
self.flush_trigger_size,
|
||||
);
|
||||
|
||||
let active_regions_num = regions_to_flush.len();
|
||||
// Selects regions to flush from the set of inactive regions.
|
||||
// For inactive regions, we use a lower flush trigger size (half of the normal size)
|
||||
// to encourage more aggressive flushing to update the region's topic latest entry id.
|
||||
let inactive_regions_to_flush = filter_regions_by_replay_size(
|
||||
topic,
|
||||
inactive_regions.into_iter(),
|
||||
avg_record_size as u64,
|
||||
latest_entry_id,
|
||||
self.flush_trigger_size / 2,
|
||||
);
|
||||
let inactive_regions_num = inactive_regions_to_flush.len();
|
||||
regions_to_flush.extend(inactive_regions_to_flush);
|
||||
|
||||
// Sends flush instructions to datanodes.
|
||||
if !regions_to_flush.is_empty() {
|
||||
self.send_flush_instructions(®ions_to_flush).await?;
|
||||
debug!(
|
||||
"Sent {} flush instructions to datanodes for topic: '{}' ({} inactive regions)",
|
||||
"Sent {} flush instructions to datanodes for topic: '{}', regions: {:?}",
|
||||
regions_to_flush.len(),
|
||||
topic,
|
||||
inactive_regions_num,
|
||||
regions_to_flush,
|
||||
);
|
||||
}
|
||||
|
||||
metrics::METRIC_META_TRIGGERED_REGION_FLUSH_TOTAL
|
||||
.with_label_values(&[topic, "active"])
|
||||
.inc_by(active_regions_num as u64);
|
||||
metrics::METRIC_META_TRIGGERED_REGION_FLUSH_TOTAL
|
||||
.with_label_values(&[topic, "inactive"])
|
||||
.inc_by(inactive_regions_num as u64);
|
||||
.with_label_values(&[topic])
|
||||
.inc_by(regions_to_flush.len() as u64);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -408,6 +385,26 @@ impl RegionFlushTrigger {
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines whether a region checkpoint should be persisted based on current and persisted state.
|
||||
fn should_persist_region_checkpoint(
|
||||
current: &LeaderRegion,
|
||||
persisted: Option<ReplayCheckpoint>,
|
||||
) -> Option<ReplayCheckpoint> {
|
||||
let new_checkpoint = ReplayCheckpoint::new(
|
||||
current.manifest.replay_entry_id(),
|
||||
current.manifest.metadata_replay_entry_id(),
|
||||
);
|
||||
|
||||
let Some(persisted) = persisted else {
|
||||
return Some(new_checkpoint);
|
||||
};
|
||||
|
||||
if new_checkpoint > persisted {
|
||||
return Some(new_checkpoint);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Filter regions based on the estimated replay size.
|
||||
///
|
||||
/// Returns the regions if its estimated replay size exceeds the given threshold.
|
||||
@@ -496,6 +493,7 @@ fn is_recent(timestamp: i64, now: i64, duration: Duration) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_meta::region_registry::LeaderRegionManifestInfo;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
@@ -626,4 +624,92 @@ mod tests {
|
||||
// Only regions 1,1 and 1,2 should be flushed
|
||||
assert_eq!(result, vec![region_id(1, 1), region_id(1, 2)]);
|
||||
}
|
||||
|
||||
fn metric_leader_region(replay_entry_id: u64, metadata_replay_entry_id: u64) -> LeaderRegion {
|
||||
LeaderRegion {
|
||||
datanode_id: 1,
|
||||
manifest: LeaderRegionManifestInfo::Metric {
|
||||
data_manifest_version: 1,
|
||||
data_flushed_entry_id: replay_entry_id,
|
||||
data_topic_latest_entry_id: 0,
|
||||
metadata_manifest_version: 1,
|
||||
metadata_flushed_entry_id: metadata_replay_entry_id,
|
||||
metadata_topic_latest_entry_id: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn mito_leader_region(replay_entry_id: u64) -> LeaderRegion {
|
||||
LeaderRegion {
|
||||
datanode_id: 1,
|
||||
manifest: LeaderRegionManifestInfo::Mito {
|
||||
manifest_version: 1,
|
||||
flushed_entry_id: replay_entry_id,
|
||||
topic_latest_entry_id: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_persist_region_checkpoint() {
|
||||
// `persisted` is none
|
||||
let current = metric_leader_region(100, 10);
|
||||
let result = should_persist_region_checkpoint(¤t, None).unwrap();
|
||||
assert_eq!(result, ReplayCheckpoint::new(100, Some(10)));
|
||||
|
||||
// `persisted.entry_id` is less than `current.manifest.replay_entry_id()`
|
||||
let current = mito_leader_region(100);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(90, None)))
|
||||
.unwrap();
|
||||
assert_eq!(result, ReplayCheckpoint::new(100, None));
|
||||
|
||||
let current = metric_leader_region(100, 10);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(90, Some(10))))
|
||||
.unwrap();
|
||||
assert_eq!(result, ReplayCheckpoint::new(100, Some(10)));
|
||||
|
||||
// `persisted.metadata_entry_id` is less than `current.manifest.metadata_replay_entry_id()`
|
||||
let current = metric_leader_region(100, 10);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(100, Some(8))))
|
||||
.unwrap();
|
||||
assert_eq!(result, ReplayCheckpoint::new(100, Some(10)));
|
||||
|
||||
// `persisted.metadata_entry_id` is none
|
||||
let current = metric_leader_region(100, 10);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(100, None)))
|
||||
.unwrap();
|
||||
assert_eq!(result, ReplayCheckpoint::new(100, Some(10)));
|
||||
|
||||
// `current.manifest.metadata_replay_entry_id()` is none
|
||||
let current = mito_leader_region(100);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(100, Some(8))))
|
||||
.is_none();
|
||||
assert!(result);
|
||||
|
||||
// `persisted.entry_id` is equal to `current.manifest.replay_entry_id()`
|
||||
let current = metric_leader_region(100, 10);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(100, Some(10))));
|
||||
assert!(result.is_none());
|
||||
let current = mito_leader_region(100);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(100, None)));
|
||||
assert!(result.is_none());
|
||||
|
||||
// `persisted.entry_id` is less than `current.manifest.replay_entry_id()`
|
||||
// `persisted.metadata_entry_id` is greater than `current.manifest.metadata_replay_entry_id()`
|
||||
let current = metric_leader_region(80, 11);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(90, Some(10))));
|
||||
assert!(result.is_none());
|
||||
let current = mito_leader_region(80);
|
||||
let result =
|
||||
should_persist_region_checkpoint(¤t, Some(ReplayCheckpoint::new(90, Some(10))));
|
||||
assert!(result.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ impl store_server::Store for Metasrv {
|
||||
let req = req.into_inner();
|
||||
|
||||
let _timer = METRIC_META_KV_REQUEST_ELAPSED
|
||||
.with_label_values(&[self.kv_backend().name(), "batch_pub"])
|
||||
.with_label_values(&[self.kv_backend().name(), "batch_put"])
|
||||
.start_timer();
|
||||
|
||||
let req: BatchPutRequest = req.into();
|
||||
|
||||
@@ -15,7 +15,9 @@
|
||||
use common_telemetry::debug;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest};
|
||||
use store_api::region_request::{
|
||||
AffectedRows, RegionCatchupRequest, RegionRequest, ReplayCheckpoint,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::engine::MetricEngineInner;
|
||||
@@ -59,6 +61,10 @@ impl MetricEngineInner {
|
||||
entry_id: req.metadata_entry_id,
|
||||
metadata_entry_id: None,
|
||||
location_id: req.location_id,
|
||||
checkpoint: req.checkpoint.map(|c| ReplayCheckpoint {
|
||||
entry_id: c.metadata_entry_id.unwrap_or_default(),
|
||||
metadata_entry_id: None,
|
||||
}),
|
||||
}),
|
||||
)
|
||||
.await
|
||||
@@ -73,6 +79,10 @@ impl MetricEngineInner {
|
||||
entry_id: req.entry_id,
|
||||
metadata_entry_id: None,
|
||||
location_id: req.location_id,
|
||||
checkpoint: req.checkpoint.map(|c| ReplayCheckpoint {
|
||||
entry_id: c.entry_id,
|
||||
metadata_entry_id: None,
|
||||
}),
|
||||
}),
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -127,10 +127,10 @@ mod tests {
|
||||
r#"
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3157, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3157, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3201, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3185, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3429, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3413, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3157, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3185, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }"#
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3413, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }"#
|
||||
);
|
||||
|
||||
// list from storage
|
||||
|
||||
@@ -19,7 +19,7 @@ common-recordbatch.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datatypes.workspace = true
|
||||
memcomparable = "0.2"
|
||||
memcomparable = { git = "https://github.com/v0y4g3r/memcomparable.git", rev = "a07122dc03556bbd88ad66234cbea7efd3b23efb" }
|
||||
paste.workspace = true
|
||||
serde.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -72,7 +72,7 @@ fn encode_sparse(c: &mut Criterion) {
|
||||
.unwrap();
|
||||
codec
|
||||
.encode_raw_tag_value(
|
||||
tags.iter().map(|(c, b)| (*c, b)),
|
||||
tags.iter().map(|(c, b)| (*c, &b[..])),
|
||||
&mut buffer_by_raw_encoding,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::{BufMut, Bytes};
|
||||
use bytes::BufMut;
|
||||
use common_recordbatch::filter::SimpleFilterEvaluator;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::{Value, ValueRef};
|
||||
@@ -179,7 +179,7 @@ impl SparsePrimaryKeyCodec {
|
||||
|
||||
pub fn encode_raw_tag_value<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
|
||||
where
|
||||
I: Iterator<Item = (ColumnId, &'a Bytes)>,
|
||||
I: Iterator<Item = (ColumnId, &'a [u8])>,
|
||||
{
|
||||
for (tag_column_id, tag_value) in row {
|
||||
let value_len = tag_value.len();
|
||||
@@ -568,11 +568,11 @@ mod tests {
|
||||
.unwrap();
|
||||
let tags: Vec<_> = tags
|
||||
.into_iter()
|
||||
.map(|(col_id, tag_value)| (col_id, Bytes::from_static(tag_value.as_bytes())))
|
||||
.map(|(col_id, tag_value)| (col_id, tag_value.as_bytes()))
|
||||
.collect();
|
||||
codec
|
||||
.encode_raw_tag_value(
|
||||
tags.iter().map(|(c, b)| (*c, b)),
|
||||
tags.iter().map(|(c, b)| (*c, *b)),
|
||||
&mut buffer_by_raw_encoding,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -50,7 +50,6 @@ index.workspace = true
|
||||
itertools.workspace = true
|
||||
lazy_static = "1.4"
|
||||
log-store = { workspace = true }
|
||||
memcomparable = "0.2"
|
||||
mito-codec.workspace = true
|
||||
moka = { workspace = true, features = ["sync", "future"] }
|
||||
object-store.workspace = true
|
||||
|
||||
@@ -189,6 +189,11 @@ impl AccessLayer {
|
||||
&self.puffin_manager_factory
|
||||
}
|
||||
|
||||
/// Returns the intermediate manager.
|
||||
pub fn intermediate_manager(&self) -> &IntermediateManager {
|
||||
&self.intermediate_manager
|
||||
}
|
||||
|
||||
/// Deletes a SST file (and its index file if it has one) with given file id.
|
||||
pub(crate) async fn delete_sst(&self, file_meta: &FileMeta) -> Result<()> {
|
||||
let path = location::sst_file_path(&self.table_dir, file_meta.file_id(), self.path_type);
|
||||
|
||||
@@ -1077,6 +1077,7 @@ mod tests {
|
||||
let staging_manifest_ctx = {
|
||||
let manager = RegionManifestManager::new(
|
||||
version_control.current().version.metadata.clone(),
|
||||
0,
|
||||
RegionManifestOptions {
|
||||
manifest_dir: "".to_string(),
|
||||
object_store: env.access_layer.object_store().clone(),
|
||||
|
||||
@@ -175,6 +175,10 @@ impl FileGroup {
|
||||
pub(crate) fn into_files(self) -> impl Iterator<Item = FileHandle> {
|
||||
self.files.into_iter()
|
||||
}
|
||||
|
||||
pub(crate) fn is_all_level_0(&self) -> bool {
|
||||
self.files.iter().all(|f| f.level() == 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for FileGroup {
|
||||
|
||||
@@ -42,6 +42,25 @@ pub fn new_file_handle_with_sequence(
|
||||
end_ts_millis: i64,
|
||||
level: Level,
|
||||
sequence: u64,
|
||||
) -> FileHandle {
|
||||
new_file_handle_with_size_and_sequence(
|
||||
file_id,
|
||||
start_ts_millis,
|
||||
end_ts_millis,
|
||||
level,
|
||||
sequence,
|
||||
0,
|
||||
)
|
||||
}
|
||||
|
||||
/// Test util to create file handles with custom size.
|
||||
pub fn new_file_handle_with_size_and_sequence(
|
||||
file_id: FileId,
|
||||
start_ts_millis: i64,
|
||||
end_ts_millis: i64,
|
||||
level: Level,
|
||||
sequence: u64,
|
||||
file_size: u64,
|
||||
) -> FileHandle {
|
||||
let file_purger = new_noop_file_purger();
|
||||
FileHandle::new(
|
||||
@@ -53,7 +72,7 @@ pub fn new_file_handle_with_sequence(
|
||||
Timestamp::new_millisecond(end_ts_millis),
|
||||
),
|
||||
level,
|
||||
file_size: 0,
|
||||
file_size,
|
||||
available_indexes: Default::default(),
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
|
||||
@@ -64,11 +64,32 @@ impl TwcsPicker {
|
||||
continue;
|
||||
}
|
||||
let mut files_to_merge: Vec<_> = files.files().cloned().collect();
|
||||
|
||||
// Filter out large files in append mode - they won't benefit from compaction
|
||||
if self.append_mode {
|
||||
if let Some(max_size) = self.max_output_file_size {
|
||||
let (kept_files, ignored_files) = files_to_merge
|
||||
.into_iter()
|
||||
.partition(|fg| fg.size() <= max_size as usize && fg.is_all_level_0());
|
||||
files_to_merge = kept_files;
|
||||
info!(
|
||||
"Skipped {} large files in append mode for region {}, window {}, max_size: {}",
|
||||
ignored_files.len(),
|
||||
region_id,
|
||||
window,
|
||||
max_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let sorted_runs = find_sorted_runs(&mut files_to_merge);
|
||||
let found_runs = sorted_runs.len();
|
||||
// We only remove deletion markers if we found less than 2 runs and not in append mode.
|
||||
// because after compaction there will be no overlapping files.
|
||||
let filter_deleted = !files.overlapping && found_runs <= 2 && !self.append_mode;
|
||||
if found_runs == 0 {
|
||||
return output;
|
||||
}
|
||||
|
||||
let inputs = if found_runs > 1 {
|
||||
reduce_runs(sorted_runs)
|
||||
@@ -330,7 +351,9 @@ mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use crate::compaction::test_util::{new_file_handle, new_file_handle_with_sequence};
|
||||
use crate::compaction::test_util::{
|
||||
new_file_handle, new_file_handle_with_sequence, new_file_handle_with_size_and_sequence,
|
||||
};
|
||||
use crate::sst::file::{FileId, Level};
|
||||
|
||||
#[test]
|
||||
@@ -766,5 +789,45 @@ mod tests {
|
||||
.check();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_mode_filter_large_files() {
|
||||
let file_ids = (0..4).map(|_| FileId::random()).collect::<Vec<_>>();
|
||||
let max_output_file_size = 1000u64;
|
||||
|
||||
// Create files with different sizes
|
||||
let small_file_1 = new_file_handle_with_size_and_sequence(file_ids[0], 0, 999, 0, 1, 500);
|
||||
let large_file_1 = new_file_handle_with_size_and_sequence(file_ids[1], 0, 999, 0, 2, 1500);
|
||||
let small_file_2 = new_file_handle_with_size_and_sequence(file_ids[2], 0, 999, 0, 3, 800);
|
||||
let large_file_2 = new_file_handle_with_size_and_sequence(file_ids[3], 0, 999, 0, 4, 2000);
|
||||
|
||||
// Create file groups (each file is in its own group due to different sequences)
|
||||
let mut files_to_merge = vec![
|
||||
FileGroup::new_with_file(small_file_1),
|
||||
FileGroup::new_with_file(large_file_1),
|
||||
FileGroup::new_with_file(small_file_2),
|
||||
FileGroup::new_with_file(large_file_2),
|
||||
];
|
||||
|
||||
// Test filtering logic directly
|
||||
let original_count = files_to_merge.len();
|
||||
|
||||
// Apply append mode filtering
|
||||
files_to_merge.retain(|fg| fg.size() <= max_output_file_size as usize);
|
||||
|
||||
// Should have filtered out 2 large files, leaving 2 small files
|
||||
assert_eq!(files_to_merge.len(), 2);
|
||||
assert_eq!(original_count, 4);
|
||||
|
||||
// Verify the remaining files are the small ones
|
||||
for fg in &files_to_merge {
|
||||
assert!(
|
||||
fg.size() <= max_output_file_size as usize,
|
||||
"File size {} should be <= {}",
|
||||
fg.size(),
|
||||
max_output_file_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(hl): TTL tester that checks if get_expired_ssts function works as expected.
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
|
||||
/// Default channel size for parallel scan task.
|
||||
pub(crate) const DEFAULT_SCAN_CHANNEL_SIZE: usize = 32;
|
||||
/// Default maximum number of SST files to scan concurrently.
|
||||
pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 128;
|
||||
pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 384;
|
||||
|
||||
// Use `1/GLOBAL_WRITE_BUFFER_SIZE_FACTOR` of OS memory as global write buffer size in default mode
|
||||
const GLOBAL_WRITE_BUFFER_SIZE_FACTOR: u64 = 8;
|
||||
@@ -120,7 +120,7 @@ pub struct MitoConfig {
|
||||
pub sst_write_buffer_size: ReadableSize,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
pub parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently (default 128).
|
||||
/// Maximum number of SST files to scan concurrently (default 384).
|
||||
pub max_concurrent_scan_files: usize,
|
||||
/// Whether to allow stale entries read during replay.
|
||||
pub allow_stale_entries: bool,
|
||||
|
||||
@@ -53,6 +53,8 @@ mod prune_test;
|
||||
#[cfg(test)]
|
||||
mod row_selector_test;
|
||||
#[cfg(test)]
|
||||
mod scan_corrupt;
|
||||
#[cfg(test)]
|
||||
mod scan_test;
|
||||
#[cfg(test)]
|
||||
mod set_role_state_test;
|
||||
|
||||
@@ -127,8 +127,7 @@ async fn test_catchup_with_last_entry_id(factory: Option<LogStoreFactory>) {
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: false,
|
||||
entry_id: last_entry_id,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -160,8 +159,7 @@ async fn test_catchup_with_last_entry_id(factory: Option<LogStoreFactory>) {
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: last_entry_id,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -251,8 +249,7 @@ async fn test_catchup_with_incorrect_last_entry_id(factory: Option<LogStoreFacto
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: false,
|
||||
entry_id: incorrect_last_entry_id,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await
|
||||
@@ -269,8 +266,7 @@ async fn test_catchup_with_incorrect_last_entry_id(factory: Option<LogStoreFacto
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: false,
|
||||
entry_id: incorrect_last_entry_id,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -340,9 +336,7 @@ async fn test_catchup_without_last_entry_id(factory: Option<LogStoreFactory>) {
|
||||
region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: false,
|
||||
entry_id: None,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -372,9 +366,7 @@ async fn test_catchup_without_last_entry_id(factory: Option<LogStoreFactory>) {
|
||||
region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: None,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -465,9 +457,7 @@ async fn test_catchup_with_manifest_update(factory: Option<LogStoreFactory>) {
|
||||
region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: false,
|
||||
entry_id: None,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -503,9 +493,7 @@ async fn test_catchup_with_manifest_update(factory: Option<LogStoreFactory>) {
|
||||
region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: None,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -652,9 +640,7 @@ async fn test_local_catchup(factory: Option<LogStoreFactory>) {
|
||||
region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: None,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
@@ -715,9 +701,7 @@ async fn test_catchup_not_exist() {
|
||||
non_exist_region_id,
|
||||
RegionRequest::Catchup(RegionCatchupRequest {
|
||||
set_writable: true,
|
||||
entry_id: None,
|
||||
metadata_entry_id: None,
|
||||
location_id: None,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await
|
||||
|
||||
112
src/mito2/src/engine/scan_corrupt.rs
Normal file
112
src/mito2/src/engine/scan_corrupt.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::helper::row;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::Rows;
|
||||
use datatypes::value::Value;
|
||||
use mito_codec::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodec};
|
||||
use parquet::file::statistics::Statistics;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{PathType, RegionRequest};
|
||||
use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::config::MitoConfig;
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use crate::test_util;
|
||||
use crate::test_util::{CreateRequestBuilder, TestEnv};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scan_corrupt() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut env = TestEnv::with_prefix("test_write_stats_with_long_string_value").await;
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let table_dir = request.table_dir.clone();
|
||||
let column_schemas = test_util::rows_schema(&request);
|
||||
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let build_rows = |start: i32, end: i32| {
|
||||
(start..end)
|
||||
.map(|i| {
|
||||
row(vec![
|
||||
ValueData::StringValue(i.to_string().repeat(128)),
|
||||
ValueData::F64Value(i as f64),
|
||||
ValueData::TimestampMillisecondValue(i as i64 * 1000),
|
||||
])
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
let put_rows = async |start, end| {
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(start, end),
|
||||
};
|
||||
test_util::put_rows(&engine, region_id, rows).await;
|
||||
test_util::flush_region(&engine, region_id, None).await;
|
||||
};
|
||||
put_rows(0, 3).await;
|
||||
|
||||
let region = engine.get_region(region_id).unwrap();
|
||||
|
||||
let version = region.version();
|
||||
let file = version
|
||||
.ssts
|
||||
.levels()
|
||||
.iter()
|
||||
.flat_map(|l| l.files.values())
|
||||
.next()
|
||||
.unwrap();
|
||||
|
||||
let object_store = env.get_object_store().unwrap();
|
||||
let reader = ParquetReaderBuilder::new(
|
||||
table_dir.clone(),
|
||||
PathType::Bare,
|
||||
file.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let codec = DensePrimaryKeyCodec::new(&version.metadata);
|
||||
for r in reader.parquet_metadata().row_groups() {
|
||||
for c in r.columns() {
|
||||
if c.column_descr().name() == PRIMARY_KEY_COLUMN_NAME {
|
||||
let stats = c.statistics().unwrap();
|
||||
let Statistics::ByteArray(b) = stats else {
|
||||
unreachable!()
|
||||
};
|
||||
let min = codec
|
||||
.decode_leftmost(b.min_bytes_opt().unwrap())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(Value::String("0".repeat(128).into()), min);
|
||||
|
||||
let max = codec
|
||||
.decode_leftmost(b.max_bytes_opt().unwrap())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(Value::String("2".repeat(128).into()), max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -27,8 +27,8 @@ use crate::error::{
|
||||
self, InstallManifestToSnafu, NoCheckpointSnafu, NoManifestsSnafu, RegionStoppedSnafu, Result,
|
||||
};
|
||||
use crate::manifest::action::{
|
||||
RegionChange, RegionCheckpoint, RegionManifest, RegionManifestBuilder, RegionMetaAction,
|
||||
RegionMetaActionList,
|
||||
RegionChange, RegionCheckpoint, RegionEdit, RegionManifest, RegionManifestBuilder,
|
||||
RegionMetaAction, RegionMetaActionList,
|
||||
};
|
||||
use crate::manifest::checkpointer::Checkpointer;
|
||||
use crate::manifest::storage::{
|
||||
@@ -150,6 +150,7 @@ impl RegionManifestManager {
|
||||
/// Constructs a region's manifest and persist it.
|
||||
pub async fn new(
|
||||
metadata: RegionMetadataRef,
|
||||
flushed_entry_id: u64,
|
||||
options: RegionManifestOptions,
|
||||
total_manifest_size: Arc<AtomicU64>,
|
||||
manifest_version: Arc<AtomicU64>,
|
||||
@@ -163,8 +164,8 @@ impl RegionManifestManager {
|
||||
);
|
||||
|
||||
info!(
|
||||
"Creating region manifest in {} with metadata {:?}",
|
||||
options.manifest_dir, metadata
|
||||
"Creating region manifest in {} with metadata {:?}, flushed_entry_id: {}",
|
||||
options.manifest_dir, metadata, flushed_entry_id
|
||||
);
|
||||
|
||||
let version = MIN_VERSION;
|
||||
@@ -184,9 +185,21 @@ impl RegionManifestManager {
|
||||
options.manifest_dir, manifest
|
||||
);
|
||||
|
||||
let mut actions = vec![RegionMetaAction::Change(RegionChange { metadata })];
|
||||
if flushed_entry_id > 0 {
|
||||
actions.push(RegionMetaAction::Edit(RegionEdit {
|
||||
files_to_add: vec![],
|
||||
files_to_remove: vec![],
|
||||
timestamp_ms: None,
|
||||
compaction_time_window: None,
|
||||
flushed_entry_id: Some(flushed_entry_id),
|
||||
flushed_sequence: None,
|
||||
}));
|
||||
}
|
||||
|
||||
// Persist region change.
|
||||
let action_list =
|
||||
RegionMetaActionList::with_action(RegionMetaAction::Change(RegionChange { metadata }));
|
||||
let action_list = RegionMetaActionList::new(actions);
|
||||
|
||||
// New region is not in staging mode.
|
||||
// TODO(ruihang): add staging mode support if needed.
|
||||
store.save(version, &action_list.encode()?, false).await?;
|
||||
|
||||
@@ -554,6 +554,8 @@ impl BulkPartEncoder {
|
||||
WriterProperties::builder()
|
||||
.set_write_batch_size(row_group_size)
|
||||
.set_max_row_group_size(row_group_size)
|
||||
.set_column_index_truncate_length(None)
|
||||
.set_statistics_truncate_length(None)
|
||||
.build(),
|
||||
);
|
||||
Self {
|
||||
|
||||
@@ -774,7 +774,9 @@ impl<'a> DataPartEncoder<'a> {
|
||||
.set_column_encoding(sequence_col.clone(), Encoding::DELTA_BINARY_PACKED)
|
||||
.set_column_dictionary_enabled(sequence_col, false)
|
||||
.set_column_encoding(op_type_col.clone(), Encoding::DELTA_BINARY_PACKED)
|
||||
.set_column_dictionary_enabled(op_type_col, true);
|
||||
.set_column_dictionary_enabled(op_type_col, true)
|
||||
.set_column_index_truncate_length(None)
|
||||
.set_statistics_truncate_length(None);
|
||||
builder.build()
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, ScannerMetrics};
|
||||
|
||||
/// Timeout to send a batch to a sender.
|
||||
const SEND_TIMEOUT: Duration = Duration::from_millis(10);
|
||||
const SEND_TIMEOUT: Duration = Duration::from_micros(100);
|
||||
|
||||
/// List of receivers.
|
||||
type ReceiverList = Vec<Option<Receiver<Result<SeriesBatch>>>>;
|
||||
|
||||
@@ -1122,6 +1122,7 @@ mod tests {
|
||||
let staging_ctx = {
|
||||
let manager = RegionManifestManager::new(
|
||||
version_control.current().version.metadata.clone(),
|
||||
0,
|
||||
RegionManifestOptions {
|
||||
manifest_dir: "".to_string(),
|
||||
object_store: env.access_layer.object_store().clone(),
|
||||
@@ -1187,6 +1188,7 @@ mod tests {
|
||||
|
||||
let manager = RegionManifestManager::new(
|
||||
metadata.clone(),
|
||||
0,
|
||||
RegionManifestOptions {
|
||||
manifest_dir: "".to_string(),
|
||||
object_store: access_layer.object_store().clone(),
|
||||
|
||||
@@ -238,8 +238,11 @@ impl RegionOpener {
|
||||
// Create a manifest manager for this region and writes regions to the manifest file.
|
||||
let region_manifest_options =
|
||||
Self::manifest_options(config, &options, ®ion_dir, &self.object_store_manager)?;
|
||||
// For remote WAL, we need to set flushed_entry_id to current topic's latest entry id.
|
||||
let flushed_entry_id = provider.initial_flushed_entry_id::<S>(wal.store());
|
||||
let manifest_manager = RegionManifestManager::new(
|
||||
metadata.clone(),
|
||||
flushed_entry_id,
|
||||
region_manifest_options,
|
||||
self.stats.total_manifest_size.clone(),
|
||||
self.stats.manifest_version.clone(),
|
||||
@@ -439,7 +442,7 @@ impl RegionOpener {
|
||||
.build();
|
||||
let flushed_entry_id = version.flushed_entry_id;
|
||||
let version_control = Arc::new(VersionControl::new(version));
|
||||
if !self.skip_wal_replay {
|
||||
let topic_latest_entry_id = if !self.skip_wal_replay {
|
||||
let replay_from_entry_id = self
|
||||
.replay_checkpoint
|
||||
.unwrap_or_default()
|
||||
@@ -461,14 +464,26 @@ impl RegionOpener {
|
||||
on_region_opened,
|
||||
)
|
||||
.await?;
|
||||
// For remote WAL, we need to set topic_latest_entry_id to current topic's latest entry id.
|
||||
// Only set after the WAL replay is completed.
|
||||
let topic_latest_entry_id = if provider.is_remote_wal()
|
||||
&& version_control.current().version.memtables.is_empty()
|
||||
{
|
||||
wal.store().latest_entry_id(&provider).unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
topic_latest_entry_id
|
||||
} else {
|
||||
info!(
|
||||
"Skip the WAL replay for region: {}, manifest version: {}, flushed_entry_id: {}",
|
||||
region_id, manifest.manifest_version, flushed_entry_id
|
||||
);
|
||||
}
|
||||
let now = self.time_provider.current_time_millis();
|
||||
|
||||
0
|
||||
};
|
||||
let now = self.time_provider.current_time_millis();
|
||||
let region = MitoRegion {
|
||||
region_id: self.region_id,
|
||||
version_control,
|
||||
@@ -483,7 +498,7 @@ impl RegionOpener {
|
||||
last_flush_millis: AtomicI64::new(now),
|
||||
last_compaction_millis: AtomicI64::new(now),
|
||||
time_provider: self.time_provider.clone(),
|
||||
topic_latest_entry_id: AtomicU64::new(0),
|
||||
topic_latest_entry_id: AtomicU64::new(topic_latest_entry_id),
|
||||
write_bytes: Arc::new(AtomicU64::new(0)),
|
||||
memtable_builder,
|
||||
stats: self.stats.clone(),
|
||||
@@ -713,8 +728,8 @@ where
|
||||
|
||||
let series_count = version_control.current().series_count();
|
||||
info!(
|
||||
"Replay WAL for region: {}, rows recovered: {}, last entry id: {}, total timeseries replayed: {}, elapsed: {:?}",
|
||||
region_id, rows_replayed, last_entry_id, series_count, now.elapsed()
|
||||
"Replay WAL for region: {}, provider: {:?}, rows recovered: {}, replay from entry id: {}, last entry id: {}, total timeseries replayed: {}, elapsed: {:?}",
|
||||
region_id, provider, rows_replayed, replay_from_entry_id, last_entry_id, series_count, now.elapsed()
|
||||
);
|
||||
Ok(last_entry_id)
|
||||
}
|
||||
|
||||
@@ -371,7 +371,7 @@ impl VersionBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets truncated entty id.
|
||||
/// Sets truncated entry id.
|
||||
pub(crate) fn truncated_entry_id(mut self, entry_id: Option<EntryId>) -> Self {
|
||||
self.truncated_entry_id = entry_id;
|
||||
self
|
||||
|
||||
@@ -319,6 +319,10 @@ impl FileHandle {
|
||||
pub fn num_rows(&self) -> usize {
|
||||
self.inner.meta.num_rows as usize
|
||||
}
|
||||
|
||||
pub fn level(&self) -> Level {
|
||||
self.inner.meta.level
|
||||
}
|
||||
}
|
||||
|
||||
/// Inner data of [FileHandle].
|
||||
|
||||
@@ -137,6 +137,14 @@ impl FilePurger for LocalFilePurger {
|
||||
error!(e; "Failed to purge stager with index file, file_id: {}, region: {}",
|
||||
file_meta.file_id(), file_meta.region_id);
|
||||
}
|
||||
let file_id = file_meta.file_id();
|
||||
if let Err(e) = sst_layer
|
||||
.intermediate_manager()
|
||||
.prune_sst_dir(&file_id.region_id(), &file_id.file_id())
|
||||
.await
|
||||
{
|
||||
error!(e; "Failed to prune intermediate sst directory, region_id: {}, file_id: {}", file_id.region_id(), file_id.file_id());
|
||||
}
|
||||
})) {
|
||||
error!(e; "Failed to schedule the file purge request");
|
||||
}
|
||||
|
||||
@@ -110,6 +110,7 @@ pub struct Indexer {
|
||||
last_mem_fulltext_index: usize,
|
||||
bloom_filter_indexer: Option<BloomFilterIndexer>,
|
||||
last_mem_bloom_filter: usize,
|
||||
intermediate_manager: Option<IntermediateManager>,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
@@ -196,6 +197,7 @@ impl IndexerBuilder for IndexerBuilderImpl {
|
||||
indexer.inverted_indexer = self.build_inverted_indexer(file_id);
|
||||
indexer.fulltext_indexer = self.build_fulltext_indexer(file_id).await;
|
||||
indexer.bloom_filter_indexer = self.build_bloom_filter_indexer(file_id);
|
||||
indexer.intermediate_manager = Some(self.intermediate_manager.clone());
|
||||
if indexer.inverted_indexer.is_none()
|
||||
&& indexer.fulltext_indexer.is_none()
|
||||
&& indexer.bloom_filter_indexer.is_none()
|
||||
|
||||
@@ -21,6 +21,7 @@ impl Indexer {
|
||||
self.do_abort_inverted_index().await;
|
||||
self.do_abort_fulltext_index().await;
|
||||
self.do_abort_bloom_filter().await;
|
||||
self.do_prune_intm_sst_dir().await;
|
||||
self.puffin_manager = None;
|
||||
}
|
||||
|
||||
|
||||
@@ -54,6 +54,7 @@ impl Indexer {
|
||||
return IndexOutput::default();
|
||||
}
|
||||
|
||||
self.do_prune_intm_sst_dir().await;
|
||||
output.file_size = self.do_finish_puffin_writer(writer).await;
|
||||
output
|
||||
}
|
||||
@@ -270,4 +271,12 @@ impl Indexer {
|
||||
output.row_count = row_count;
|
||||
output.columns = column_ids;
|
||||
}
|
||||
|
||||
pub(crate) async fn do_prune_intm_sst_dir(&mut self) {
|
||||
if let Some(manager) = self.intermediate_manager.take() {
|
||||
if let Err(e) = manager.prune_sst_dir(&self.region_id, &self.file_id).await {
|
||||
warn!(e; "Failed to prune intermediate SST directory, region_id: {}, file_id: {}", self.region_id, self.file_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::ErrorKind;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -54,14 +55,28 @@ impl IntermediateManager {
|
||||
aux_path.as_ref()
|
||||
);
|
||||
|
||||
// Remove the intermediate directory on bankground
|
||||
let aux_pb = PathBuf::from(aux_path.as_ref());
|
||||
let intm_dir = aux_pb.join(INTERMEDIATE_DIR);
|
||||
let deleted_dir = intm_dir.with_extension(format!("deleted-{}", Uuid::new_v4()));
|
||||
match tokio::fs::rename(&intm_dir, &deleted_dir).await {
|
||||
Ok(_) => {
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = tokio::fs::remove_dir_all(deleted_dir).await {
|
||||
warn!(err; "Failed to remove intermediate directory");
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(err) => {
|
||||
if err.kind() != ErrorKind::NotFound {
|
||||
warn!(err; "Failed to rename intermediate directory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let store = new_fs_cache_store(&normalize_dir(aux_path.as_ref())).await?;
|
||||
let store = InstrumentedStore::new(store);
|
||||
|
||||
// Remove all garbage intermediate files from previous runs.
|
||||
if let Err(err) = store.remove_all(INTERMEDIATE_DIR).await {
|
||||
warn!(err; "Failed to remove garbage intermediate files");
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
base_dir: PathBuf::from(aux_path.as_ref()),
|
||||
store,
|
||||
@@ -94,6 +109,24 @@ impl IntermediateManager {
|
||||
.join(sst_file_id.to_string())
|
||||
.join(format!("fulltext-{column_id}-{uuid}"))
|
||||
}
|
||||
|
||||
/// Prunes the intermediate directory for SST files.
|
||||
pub(crate) async fn prune_sst_dir(
|
||||
&self,
|
||||
region_id: &RegionId,
|
||||
sst_file_id: &FileId,
|
||||
) -> Result<()> {
|
||||
let region_id = region_id.as_u64();
|
||||
let sst_dir = format!("{INTERMEDIATE_DIR}/{region_id}/{sst_file_id}/");
|
||||
self.store.remove_all(&sst_dir).await
|
||||
}
|
||||
|
||||
/// Prunes the intermediate directory for region files.
|
||||
pub(crate) async fn prune_region_dir(&self, region_id: &RegionId) -> Result<()> {
|
||||
let region_id = region_id.as_u64();
|
||||
let region_dir = format!("{INTERMEDIATE_DIR}/{region_id}/");
|
||||
self.store.remove_all(®ion_dir).await
|
||||
}
|
||||
}
|
||||
|
||||
/// `IntermediateLocation` produces paths for intermediate files
|
||||
@@ -268,6 +301,60 @@ mod tests {
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cleanup_dir() {
|
||||
let temp_dir = temp_dir::create_temp_dir("test_cleanup_dir_");
|
||||
|
||||
let region_id = RegionId::new(0, 0);
|
||||
let sst_file_id = FileId::random();
|
||||
let region_dir = temp_dir
|
||||
.path()
|
||||
.join(INTERMEDIATE_DIR)
|
||||
.join(region_id.as_u64().to_string());
|
||||
let sst_dir = region_dir.join(sst_file_id.to_string());
|
||||
|
||||
let path = temp_dir.path().to_str().unwrap();
|
||||
let manager = IntermediateManager::init_fs(path).await.unwrap();
|
||||
|
||||
let location = IntermediateLocation::new(®ion_id, &sst_file_id);
|
||||
let temp_file_provider = TempFileProvider::new(location, manager.clone());
|
||||
|
||||
let mut f1 = temp_file_provider
|
||||
.create("sky", "000000000000")
|
||||
.await
|
||||
.unwrap();
|
||||
f1.write_all(b"hello").await.unwrap();
|
||||
f1.flush().await.unwrap();
|
||||
f1.close().await.unwrap();
|
||||
|
||||
let mut f2 = temp_file_provider
|
||||
.create("sky", "000000000001")
|
||||
.await
|
||||
.unwrap();
|
||||
f2.write_all(b"world").await.unwrap();
|
||||
f2.flush().await.unwrap();
|
||||
f2.close().await.unwrap();
|
||||
|
||||
temp_file_provider.cleanup().await.unwrap();
|
||||
|
||||
// sst_dir and region_dir still exists
|
||||
assert!(tokio::fs::try_exists(&sst_dir).await.unwrap());
|
||||
assert!(tokio::fs::try_exists(®ion_dir).await.unwrap());
|
||||
|
||||
// sst_dir should be deleted, region_dir still exists
|
||||
manager
|
||||
.prune_sst_dir(®ion_id, &sst_file_id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(tokio::fs::try_exists(®ion_dir).await.unwrap());
|
||||
assert!(!tokio::fs::try_exists(&sst_dir).await.unwrap());
|
||||
|
||||
// sst_dir, region_dir should be deleted
|
||||
manager.prune_region_dir(®ion_id).await.unwrap();
|
||||
assert!(!tokio::fs::try_exists(&sst_dir).await.unwrap());
|
||||
assert!(!tokio::fs::try_exists(®ion_dir).await.unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_location() {
|
||||
let sst_file_id = FileId::random();
|
||||
|
||||
@@ -329,7 +329,9 @@ where
|
||||
.set_key_value_metadata(Some(vec![key_value_meta]))
|
||||
.set_compression(Compression::ZSTD(ZstdLevel::default()))
|
||||
.set_encoding(Encoding::PLAIN)
|
||||
.set_max_row_group_size(opts.row_group_size);
|
||||
.set_max_row_group_size(opts.row_group_size)
|
||||
.set_column_index_truncate_length(None)
|
||||
.set_statistics_truncate_length(None);
|
||||
|
||||
let props_builder = Self::customize_column_config(props_builder, &self.metadata);
|
||||
let writer_props = props_builder.build();
|
||||
|
||||
@@ -563,6 +563,7 @@ impl TestEnv {
|
||||
if let Some(metadata) = initial_metadata {
|
||||
RegionManifestManager::new(
|
||||
metadata,
|
||||
0,
|
||||
manifest_opts,
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
|
||||
@@ -116,6 +116,7 @@ impl SchedulerEnv {
|
||||
Arc::new(ManifestContext::new(
|
||||
RegionManifestManager::new(
|
||||
metadata,
|
||||
0,
|
||||
RegionManifestOptions {
|
||||
manifest_dir: "".to_string(),
|
||||
object_store: self.access_layer.object_store().clone(),
|
||||
|
||||
@@ -65,7 +65,12 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
|
||||
if region.provider.is_remote_wal() {
|
||||
let flushed_entry_id = region.version_control.current().last_entry_id;
|
||||
info!("Trying to replay memtable for region: {region_id}, flushed entry id: {flushed_entry_id}");
|
||||
let replay_from_entry_id = request
|
||||
.checkpoint
|
||||
.map(|c| c.entry_id)
|
||||
.unwrap_or_default()
|
||||
.max(flushed_entry_id);
|
||||
info!("Trying to replay memtable for region: {region_id}, provider: {:?}, replay from entry id: {replay_from_entry_id}, flushed entry id: {flushed_entry_id}", region.provider);
|
||||
let timer = Instant::now();
|
||||
let wal_entry_reader =
|
||||
self.wal
|
||||
@@ -75,15 +80,16 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
®ion.provider,
|
||||
wal_entry_reader,
|
||||
region_id,
|
||||
flushed_entry_id,
|
||||
replay_from_entry_id,
|
||||
®ion.version_control,
|
||||
self.config.allow_stale_entries,
|
||||
on_region_opened,
|
||||
)
|
||||
.await?;
|
||||
info!(
|
||||
"Elapsed: {:?}, region: {region_id} catchup finished. last entry id: {last_entry_id}, expected: {:?}.",
|
||||
"Elapsed: {:?}, region: {region_id}, provider: {:?} catchup finished. replay from entry id: {replay_from_entry_id}, flushed entry id: {flushed_entry_id}, last entry id: {last_entry_id}, expected: {:?}.",
|
||||
timer.elapsed(),
|
||||
region.provider,
|
||||
request.entry_id
|
||||
);
|
||||
if let Some(expected_last_entry_id) = request.entry_id {
|
||||
|
||||
@@ -99,6 +99,7 @@ where
|
||||
let object_store = region.access_layer.object_store().clone();
|
||||
let dropping_regions = self.dropping_regions.clone();
|
||||
let listener = self.listener.clone();
|
||||
let intm_manager = self.intermediate_manager.clone();
|
||||
common_runtime::spawn_global(async move {
|
||||
let gc_duration = listener
|
||||
.on_later_drop_begin(region_id)
|
||||
@@ -111,6 +112,9 @@ where
|
||||
gc_duration,
|
||||
)
|
||||
.await;
|
||||
if let Err(err) = intm_manager.prune_region_dir(®ion_id).await {
|
||||
warn!(err; "Failed to prune intermediate region directory, region_id: {}", region_id);
|
||||
}
|
||||
listener.on_later_drop_end(region_id, removed);
|
||||
});
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ use common_error::define_into_tonic_status;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_query::error::Error as QueryResult;
|
||||
use datafusion::parquet;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
@@ -36,6 +37,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to cast result: `{}`", source))]
|
||||
Cast {
|
||||
#[snafu(source)]
|
||||
source: QueryResult,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("View already exists: `{name}`"))]
|
||||
ViewAlreadyExists {
|
||||
name: String,
|
||||
@@ -870,6 +879,7 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::Cast { source, .. } => source.status_code(),
|
||||
Error::InvalidSql { .. }
|
||||
| Error::InvalidConfigValue { .. }
|
||||
| Error::InvalidInsertRequest { .. }
|
||||
|
||||
@@ -32,7 +32,7 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::ast::{Expr, FunctionArg, FunctionArgExpr, FunctionArguments, Value as SqlValue};
|
||||
use sql::statements::admin::Admin;
|
||||
|
||||
use crate::error::{self, ExecuteAdminFunctionSnafu, IntoVectorsSnafu, Result};
|
||||
use crate::error::{self, CastSnafu, ExecuteAdminFunctionSnafu, Result};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
const DUMMY_COLUMN: &str = "<dummy>";
|
||||
@@ -118,7 +118,7 @@ impl StatementExecutor {
|
||||
.collect(),
|
||||
return_field: Arc::new(arrow::datatypes::Field::new("result", ret_type, true)),
|
||||
number_rows: if args.is_empty() { 1 } else { args[0].len() },
|
||||
config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
|
||||
config_options: Arc::new(query_ctx.create_config_options()),
|
||||
};
|
||||
|
||||
// Execute the async UDF
|
||||
@@ -134,22 +134,11 @@ impl StatementExecutor {
|
||||
})?;
|
||||
|
||||
// Convert result back to VectorRef
|
||||
let result = match result_columnar {
|
||||
datafusion_expr::ColumnarValue::Array(array) => {
|
||||
datatypes::vectors::Helper::try_into_vector(array).context(IntoVectorsSnafu)?
|
||||
}
|
||||
datafusion_expr::ColumnarValue::Scalar(scalar) => {
|
||||
let array =
|
||||
scalar
|
||||
.to_array_of_size(1)
|
||||
.with_context(|_| ExecuteAdminFunctionSnafu {
|
||||
msg: format!("Failed to convert scalar to array for {}", fn_name),
|
||||
})?;
|
||||
datatypes::vectors::Helper::try_into_vector(array).context(IntoVectorsSnafu)?
|
||||
}
|
||||
};
|
||||
let result_columnar: common_query::prelude::ColumnarValue =
|
||||
(&result_columnar).try_into().context(CastSnafu)?;
|
||||
|
||||
let result_vector: VectorRef = result_columnar.try_into_vector(1).context(CastSnafu)?;
|
||||
|
||||
let result_vector: VectorRef = result;
|
||||
let column_schemas = vec![ColumnSchema::new(
|
||||
// Use statement as the result column name
|
||||
stmt.to_string(),
|
||||
|
||||
@@ -369,6 +369,9 @@ impl<H> BoundedStager<H> {
|
||||
/// Note: It can't recover the mapping between puffin files and keys, so TTL
|
||||
/// is configured to purge the dangling files and directories.
|
||||
async fn recover(&self) -> Result<()> {
|
||||
let timer = std::time::Instant::now();
|
||||
info!("Recovering the staging area, base_dir: {:?}", self.base_dir);
|
||||
|
||||
let mut read_dir = fs::read_dir(&self.base_dir).await.context(ReadSnafu)?;
|
||||
|
||||
let mut elems = HashMap::new();
|
||||
@@ -430,6 +433,7 @@ impl<H> BoundedStager<H> {
|
||||
}
|
||||
|
||||
let mut size = 0;
|
||||
let num_elems = elems.len();
|
||||
for (key, value) in elems {
|
||||
size += value.size();
|
||||
self.cache.insert(key, value).await;
|
||||
@@ -440,6 +444,12 @@ impl<H> BoundedStager<H> {
|
||||
|
||||
self.cache.run_pending_tasks().await;
|
||||
|
||||
info!(
|
||||
"Recovered the staging area, num_entries: {}, num_bytes: {}, cost: {:?}",
|
||||
num_elems,
|
||||
size,
|
||||
timer.elapsed()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -537,6 +537,9 @@ impl QueryEngine for DatafusionQueryEngine {
|
||||
}
|
||||
}
|
||||
|
||||
// configure execution options
|
||||
state.config_mut().options_mut().execution.time_zone = query_ctx.timezone().to_string();
|
||||
|
||||
// usually it's impossible to have both `set variable` set by sql client and
|
||||
// hint in header by grpc client, so only need to deal with them separately
|
||||
if query_ctx.configuration_parameter().allow_query_fallback() {
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
@@ -32,10 +32,12 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::dist_plan::analyzer::utils::{aliased_columns_for, rewrite_merge_sort_exprs};
|
||||
use crate::dist_plan::commutativity::{
|
||||
partial_commutative_transformer, Categorizer, Commutativity,
|
||||
};
|
||||
use crate::dist_plan::merge_scan::MergeScanLogicalPlan;
|
||||
use crate::dist_plan::merge_sort::MergeSortLogicalPlan;
|
||||
use crate::metrics::PUSH_DOWN_FALLBACK_ERRORS_TOTAL;
|
||||
use crate::plan::ExtractExpr;
|
||||
use crate::query_engine::DefaultSerializer;
|
||||
@@ -46,7 +48,7 @@ mod test;
|
||||
mod fallback;
|
||||
mod utils;
|
||||
|
||||
pub(crate) use utils::{AliasMapping, AliasTracker};
|
||||
pub(crate) use utils::AliasMapping;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DistPlannerOptions {
|
||||
@@ -229,8 +231,7 @@ struct PlanRewriter {
|
||||
stage: Vec<LogicalPlan>,
|
||||
status: RewriterStatus,
|
||||
/// Partition columns of the table in current pass
|
||||
partition_cols: Option<Vec<String>>,
|
||||
alias_tracker: Option<AliasTracker>,
|
||||
partition_cols: Option<AliasMapping>,
|
||||
/// use stack count as scope to determine column requirements is needed or not
|
||||
/// i.e for a logical plan like:
|
||||
/// ```ignore
|
||||
@@ -311,7 +312,7 @@ impl PlanRewriter {
|
||||
}
|
||||
|
||||
if self.expand_on_next_part_cond_trans_commutative {
|
||||
let comm = Categorizer::check_plan(plan, self.get_aliased_partition_columns());
|
||||
let comm = Categorizer::check_plan(plan, self.partition_cols.clone());
|
||||
match comm {
|
||||
Commutativity::PartialCommutative => {
|
||||
// a small difference is that for partial commutative, we still need to
|
||||
@@ -333,7 +334,7 @@ impl PlanRewriter {
|
||||
}
|
||||
}
|
||||
|
||||
match Categorizer::check_plan(plan, self.get_aliased_partition_columns()) {
|
||||
match Categorizer::check_plan(plan, self.partition_cols.clone()) {
|
||||
Commutativity::Commutative => {}
|
||||
Commutativity::PartialCommutative => {
|
||||
if let Some(plan) = partial_commutative_transformer(plan) {
|
||||
@@ -427,49 +428,31 @@ impl PlanRewriter {
|
||||
self.status = RewriterStatus::Unexpanded;
|
||||
}
|
||||
|
||||
/// Maybe update alias for original table columns in the plan
|
||||
fn maybe_update_alias(&mut self, node: &LogicalPlan) {
|
||||
if let Some(alias_tracker) = &mut self.alias_tracker {
|
||||
alias_tracker.update_alias(node);
|
||||
debug!(
|
||||
"Current partition columns are: {:?}",
|
||||
self.get_aliased_partition_columns()
|
||||
);
|
||||
} else if let LogicalPlan::TableScan(table_scan) = node {
|
||||
self.alias_tracker = AliasTracker::new(table_scan);
|
||||
debug!(
|
||||
"Initialize partition columns: {:?} with table={}",
|
||||
self.get_aliased_partition_columns(),
|
||||
table_scan.table_name
|
||||
);
|
||||
}
|
||||
}
|
||||
fn maybe_set_partitions(&mut self, plan: &LogicalPlan) -> DfResult<()> {
|
||||
if let Some(part_cols) = &mut self.partition_cols {
|
||||
// update partition alias
|
||||
let child = plan.inputs().first().cloned().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"PlanRewriter: maybe_set_partitions: plan has no child: {plan}"
|
||||
))
|
||||
})?;
|
||||
|
||||
fn get_aliased_partition_columns(&self) -> Option<AliasMapping> {
|
||||
if let Some(part_cols) = self.partition_cols.as_ref() {
|
||||
let Some(alias_tracker) = &self.alias_tracker else {
|
||||
// no alias tracker meaning no table scan encountered
|
||||
return None;
|
||||
};
|
||||
let mut aliased = HashMap::new();
|
||||
for part_col in part_cols {
|
||||
let all_alias = alias_tracker
|
||||
.get_all_alias_for_col(part_col)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
aliased.insert(part_col.clone(), all_alias);
|
||||
for (_col_name, alias_set) in part_cols.iter_mut() {
|
||||
let aliased_cols = aliased_columns_for(
|
||||
&alias_set.clone().into_iter().collect(),
|
||||
plan,
|
||||
Some(child),
|
||||
)?;
|
||||
*alias_set = aliased_cols.into_values().flatten().collect();
|
||||
}
|
||||
Some(aliased)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_set_partitions(&mut self, plan: &LogicalPlan) {
|
||||
if self.partition_cols.is_some() {
|
||||
// only need to set once
|
||||
return;
|
||||
debug!(
|
||||
"PlanRewriter: maybe_set_partitions: updated partition columns: {:?} at plan: {}",
|
||||
part_cols,
|
||||
plan.display()
|
||||
);
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let LogicalPlan::TableScan(table_scan) = plan {
|
||||
@@ -509,11 +492,33 @@ impl PlanRewriter {
|
||||
partition_cols
|
||||
.push("__OTHER_PHYSICAL_PART_COLS_PLACEHOLDER__".to_string());
|
||||
}
|
||||
self.partition_cols = Some(partition_cols);
|
||||
self.partition_cols = Some(
|
||||
partition_cols
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
let index =
|
||||
plan.schema().index_of_column_by_name(None, &c).ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(
|
||||
format!(
|
||||
"PlanRewriter: maybe_set_partitions: column {c} not found in schema of plan: {plan}"
|
||||
),
|
||||
)
|
||||
})?;
|
||||
let column = plan.schema().columns().get(index).cloned().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"PlanRewriter: maybe_set_partitions: column index {index} out of bounds in schema of plan: {plan}"
|
||||
))
|
||||
})?;
|
||||
Ok((c.clone(), BTreeSet::from([column])))
|
||||
})
|
||||
.collect::<DfResult<AliasMapping>>()?,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// pop one stack item and reduce the level by 1
|
||||
@@ -539,9 +544,14 @@ impl PlanRewriter {
|
||||
"PlanRewriter: after enforced column requirements with rewriter: {rewriter:?} for node:\n{on_node}"
|
||||
);
|
||||
|
||||
debug!(
|
||||
"PlanRewriter: expand on node: {on_node} with partition col alias mapping: {:?}",
|
||||
self.partition_cols
|
||||
);
|
||||
|
||||
// add merge scan as the new root
|
||||
let mut node = MergeScanLogicalPlan::new(
|
||||
on_node,
|
||||
on_node.clone(),
|
||||
false,
|
||||
// at this stage, the partition cols should be set
|
||||
// treat it as non-partitioned if None
|
||||
@@ -551,6 +561,15 @@ impl PlanRewriter {
|
||||
|
||||
// expand stages
|
||||
for new_stage in self.stage.drain(..) {
|
||||
// tracking alias for merge sort's sort exprs
|
||||
let new_stage = if let LogicalPlan::Extension(ext) = &new_stage
|
||||
&& let Some(merge_sort) = ext.node.as_any().downcast_ref::<MergeSortLogicalPlan>()
|
||||
{
|
||||
// TODO(discord9): change `on_node` to `node` once alias tracking is supported for merge scan
|
||||
rewrite_merge_sort_exprs(merge_sort, &on_node)?
|
||||
} else {
|
||||
new_stage
|
||||
};
|
||||
node = new_stage
|
||||
.with_new_exprs(new_stage.expressions_consider_join(), vec![node.clone()])?;
|
||||
}
|
||||
@@ -592,6 +611,7 @@ struct EnforceDistRequirementRewriter {
|
||||
/// when on `Projection` node, we don't need to apply the column requirements of `Aggregate` node
|
||||
/// because the `Projection` node is not in the scope of the `Aggregate` node
|
||||
cur_level: usize,
|
||||
plan_per_level: BTreeMap<usize, LogicalPlan>,
|
||||
}
|
||||
|
||||
impl EnforceDistRequirementRewriter {
|
||||
@@ -599,8 +619,67 @@ impl EnforceDistRequirementRewriter {
|
||||
Self {
|
||||
column_requirements,
|
||||
cur_level,
|
||||
plan_per_level: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a mapping from (original column, level) to aliased columns in current node of all
|
||||
/// applicable column requirements
|
||||
/// i.e. only column requirements with level >= `cur_level` will be considered
|
||||
fn get_current_applicable_column_requirements(
|
||||
&self,
|
||||
node: &LogicalPlan,
|
||||
) -> DfResult<BTreeMap<(Column, usize), BTreeSet<Column>>> {
|
||||
let col_req_per_level = self
|
||||
.column_requirements
|
||||
.iter()
|
||||
.filter(|(_, level)| *level >= self.cur_level)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// track alias for columns and use aliased columns instead
|
||||
// aliased col reqs at current level
|
||||
let mut result_alias_mapping = BTreeMap::new();
|
||||
let Some(child) = node.inputs().first().cloned() else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
for (col_req, level) in col_req_per_level {
|
||||
if let Some(original) = self.plan_per_level.get(level) {
|
||||
// query for alias in current plan
|
||||
let aliased_cols =
|
||||
aliased_columns_for(&col_req.iter().cloned().collect(), node, Some(original))?;
|
||||
for original_col in col_req {
|
||||
let aliased_cols = aliased_cols.get(original_col).cloned();
|
||||
if let Some(cols) = aliased_cols
|
||||
&& !cols.is_empty()
|
||||
{
|
||||
result_alias_mapping.insert((original_col.clone(), *level), cols);
|
||||
} else {
|
||||
// if no aliased column found in current node, there should be alias in child node as promised by enforce col reqs
|
||||
// because it should insert required columns in child node
|
||||
// so we can find the alias in child node
|
||||
// if not found, it's an internal error
|
||||
let aliases_in_child = aliased_columns_for(
|
||||
&[original_col.clone()].into(),
|
||||
child,
|
||||
Some(original),
|
||||
)?;
|
||||
let Some(aliases) = aliases_in_child
|
||||
.get(original_col)
|
||||
.cloned()
|
||||
.filter(|a| !a.is_empty())
|
||||
else {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"EnforceDistRequirementRewriter: no alias found for required column {original_col} in child plan {child} from original plan {original}",
|
||||
)));
|
||||
};
|
||||
|
||||
result_alias_mapping.insert((original_col.clone(), *level), aliases);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result_alias_mapping)
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
@@ -614,6 +693,7 @@ impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
self.plan_per_level.insert(self.cur_level, node.clone());
|
||||
self.cur_level += 1;
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
@@ -621,38 +701,41 @@ impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
self.cur_level -= 1;
|
||||
// first get all applicable column requirements
|
||||
let mut applicable_column_requirements = self
|
||||
.column_requirements
|
||||
.iter()
|
||||
.filter(|(_, level)| *level >= self.cur_level)
|
||||
.map(|(cols, _)| cols.clone())
|
||||
.reduce(|mut acc, cols| {
|
||||
acc.extend(cols);
|
||||
acc
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
debug!(
|
||||
"EnforceDistRequirementRewriter: applicable column requirements at level {} = {:?} for node {}",
|
||||
self.cur_level,
|
||||
applicable_column_requirements,
|
||||
node.display()
|
||||
);
|
||||
|
||||
// make sure all projection applicable scope has the required columns
|
||||
if let LogicalPlan::Projection(ref projection) = node {
|
||||
let mut applicable_column_requirements =
|
||||
self.get_current_applicable_column_requirements(&node)?;
|
||||
|
||||
debug!(
|
||||
"EnforceDistRequirementRewriter: applicable column requirements at level {} = {:?} for node {}",
|
||||
self.cur_level,
|
||||
applicable_column_requirements,
|
||||
node.display()
|
||||
);
|
||||
|
||||
for expr in &projection.expr {
|
||||
let (qualifier, name) = expr.qualified_name();
|
||||
let column = Column::new(qualifier, name);
|
||||
applicable_column_requirements.remove(&column);
|
||||
applicable_column_requirements.retain(|_col_level, alias_set| {
|
||||
// remove all columns that are already in the projection exprs
|
||||
!alias_set.contains(&column)
|
||||
});
|
||||
}
|
||||
if applicable_column_requirements.is_empty() {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
let mut new_exprs = projection.expr.clone();
|
||||
for col in &applicable_column_requirements {
|
||||
new_exprs.push(Expr::Column(col.clone()));
|
||||
for (col, alias_set) in &applicable_column_requirements {
|
||||
// use the first alias in alias set as the column to add
|
||||
new_exprs.push(Expr::Column(alias_set.first().cloned().ok_or_else(
|
||||
|| {
|
||||
datafusion_common::DataFusionError::Internal(
|
||||
format!("EnforceDistRequirementRewriter: alias set is empty, for column {col:?} in node {node}"),
|
||||
)
|
||||
},
|
||||
)?));
|
||||
}
|
||||
let new_node =
|
||||
node.with_new_exprs(new_exprs, node.inputs().into_iter().cloned().collect())?;
|
||||
@@ -661,6 +744,9 @@ impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
applicable_column_requirements
|
||||
);
|
||||
|
||||
// update plan for later use
|
||||
self.plan_per_level.insert(self.cur_level, new_node.clone());
|
||||
|
||||
// still need to continue for next projection if applicable
|
||||
return Ok(Transformed::yes(new_node));
|
||||
}
|
||||
@@ -679,7 +765,6 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
self.stage.clear();
|
||||
self.set_unexpanded();
|
||||
self.partition_cols = None;
|
||||
self.alias_tracker = None;
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
|
||||
@@ -700,9 +785,7 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
self.maybe_set_partitions(&node);
|
||||
|
||||
self.maybe_update_alias(&node);
|
||||
self.maybe_set_partitions(&node)?;
|
||||
|
||||
let Some(parent) = self.get_parent() else {
|
||||
debug!("Plan Rewriter: expand now for no parent found for node: {node}");
|
||||
|
||||
@@ -17,13 +17,17 @@
|
||||
//! This is a temporary solution, and will be removed once we have a more robust plan rewriter
|
||||
//!
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::dist_plan::analyzer::AliasMapping;
|
||||
use crate::dist_plan::MergeScanLogicalPlan;
|
||||
|
||||
/// FallbackPlanRewriter is a plan rewriter that will only push down table scan node
|
||||
@@ -38,9 +42,9 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
|
||||
fn f_down(
|
||||
&mut self,
|
||||
node: Self::Node,
|
||||
) -> datafusion_common::Result<datafusion_common::tree_node::Transformed<Self::Node>> {
|
||||
if let LogicalPlan::TableScan(table_scan) = &node {
|
||||
plan: Self::Node,
|
||||
) -> DfResult<datafusion_common::tree_node::Transformed<Self::Node>> {
|
||||
if let LogicalPlan::TableScan(table_scan) = &plan {
|
||||
let partition_cols = if let Some(source) = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
@@ -63,7 +67,25 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
"FallbackPlanRewriter: table {} has partition columns: {:?}",
|
||||
info.name, partition_cols
|
||||
);
|
||||
Some(partition_cols)
|
||||
Some(partition_cols
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
let index =
|
||||
plan.schema().index_of_column_by_name(None, &c).ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(
|
||||
format!(
|
||||
"PlanRewriter: maybe_set_partitions: column {c} not found in schema of plan: {plan}"
|
||||
),
|
||||
)
|
||||
})?;
|
||||
let column = plan.schema().columns().get(index).cloned().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"PlanRewriter: maybe_set_partitions: column index {index} out of bounds in schema of plan: {plan}"
|
||||
))
|
||||
})?;
|
||||
Ok((c.clone(), BTreeSet::from([column])))
|
||||
})
|
||||
.collect::<DfResult<AliasMapping>>()?)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -74,7 +96,7 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
None
|
||||
};
|
||||
let node = MergeScanLogicalPlan::new(
|
||||
node,
|
||||
plan,
|
||||
false,
|
||||
// at this stage, the partition cols should be set
|
||||
// treat it as non-partitioned if None
|
||||
@@ -83,7 +105,7 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
.into_logical_plan();
|
||||
Ok(Transformed::yes(node))
|
||||
} else {
|
||||
Ok(Transformed::no(node))
|
||||
Ok(Transformed::no(plan))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ impl Stream for EmptyStream {
|
||||
fn expand_proj_sort_proj() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -199,11 +199,58 @@ fn expand_proj_sort_proj() {
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_proj_sort_partial_proj() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.project(vec![col("number"), col("pk1"), col("pk2"), col("pk3")])
|
||||
.unwrap()
|
||||
.project(vec![
|
||||
col("number"),
|
||||
col("pk1"),
|
||||
col("pk3"),
|
||||
col("pk1").eq(col("pk2")),
|
||||
])
|
||||
.unwrap()
|
||||
.sort(vec![col("t.pk1 = t.pk2").sort(true, true)])
|
||||
.unwrap()
|
||||
.project(vec![col("number"), col("t.pk1 = t.pk2").alias("eq_sorted")])
|
||||
.unwrap()
|
||||
.project(vec![col("number")])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
|
||||
let expected = [
|
||||
"Projection: t.number",
|
||||
" MergeSort: eq_sorted ASC NULLS FIRST", // notice how `eq_sorted` is used here
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Projection: t.number, eq_sorted", // notice how `eq_sorted` is added not `t.pk1 = t.pk2`
|
||||
" Projection: t.number, t.pk1 = t.pk2 AS eq_sorted",
|
||||
" Sort: t.pk1 = t.pk2 ASC NULLS FIRST",
|
||||
" Projection: t.number, t.pk1, t.pk3, t.pk1 = t.pk2",
|
||||
" Projection: t.number, t.pk1, t.pk2, t.pk3", // notice this projection doesn't add `t.pk1 = t.pk2` column requirement
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_sort_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -233,11 +280,13 @@ fn expand_sort_limit() {
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
/// Test merge sort can apply enforce dist requirement columns correctly and use the aliased column correctly, as there is
|
||||
/// a aliased sort column, there is no need to add a duplicate sort column using it's original column name
|
||||
#[test]
|
||||
fn expand_sort_alias_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -258,10 +307,10 @@ fn expand_sort_alias_limit() {
|
||||
let expected = [
|
||||
"Projection: something",
|
||||
" Limit: skip=0, fetch=10",
|
||||
" MergeSort: t.pk1 ASC NULLS LAST",
|
||||
" MergeSort: something ASC NULLS LAST",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Limit: skip=0, fetch=10",
|
||||
" Projection: t.pk1 AS something, t.pk1",
|
||||
" Projection: t.pk1 AS something",
|
||||
" Sort: t.pk1 ASC NULLS LAST",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
@@ -276,7 +325,7 @@ fn expand_sort_alias_limit() {
|
||||
fn expand_sort_alias_conflict_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -318,7 +367,7 @@ fn expand_sort_alias_conflict_limit() {
|
||||
fn expand_sort_alias_conflict_but_not_really_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -358,7 +407,7 @@ fn expand_sort_alias_conflict_but_not_really_limit() {
|
||||
fn expand_limit_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -391,7 +440,7 @@ fn expand_limit_sort() {
|
||||
fn expand_sort_limit_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -438,7 +487,7 @@ fn expand_sort_limit_sort() {
|
||||
fn expand_proj_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -473,7 +522,7 @@ fn expand_proj_step_aggr() {
|
||||
fn expand_proj_alias_fake_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -517,7 +566,7 @@ fn expand_proj_alias_fake_part_col_aggr() {
|
||||
fn expand_proj_alias_aliased_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -563,7 +612,7 @@ fn expand_proj_alias_aliased_part_col_aggr() {
|
||||
fn expand_part_col_aggr_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -596,7 +645,7 @@ fn expand_part_col_aggr_step_aggr() {
|
||||
fn expand_step_aggr_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -629,7 +678,7 @@ fn expand_step_aggr_step_aggr() {
|
||||
fn expand_part_col_aggr_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -673,7 +722,7 @@ fn expand_part_col_aggr_part_col_aggr() {
|
||||
fn expand_step_aggr_proj() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -709,7 +758,7 @@ fn expand_step_aggr_proj() {
|
||||
fn expand_proj_sort_step_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -750,7 +799,7 @@ fn expand_proj_sort_step_aggr_limit() {
|
||||
fn expand_proj_sort_limit_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -792,7 +841,7 @@ fn expand_proj_sort_limit_step_aggr() {
|
||||
fn expand_proj_limit_step_aggr_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -833,7 +882,7 @@ fn expand_proj_limit_step_aggr_sort() {
|
||||
fn expand_proj_sort_part_col_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -875,7 +924,7 @@ fn expand_proj_sort_part_col_aggr_limit() {
|
||||
fn expand_proj_sort_limit_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -917,7 +966,7 @@ fn expand_proj_sort_limit_part_col_aggr() {
|
||||
fn expand_proj_part_col_aggr_limit_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -959,7 +1008,7 @@ fn expand_proj_part_col_aggr_limit_sort() {
|
||||
fn expand_proj_part_col_aggr_sort_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1002,7 +1051,7 @@ fn expand_proj_part_col_aggr_sort_limit() {
|
||||
fn expand_proj_limit_part_col_aggr_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1044,7 +1093,7 @@ fn expand_proj_limit_part_col_aggr_sort() {
|
||||
fn expand_proj_limit_sort_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1087,7 +1136,7 @@ fn expand_proj_limit_sort_part_col_aggr() {
|
||||
fn expand_step_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1120,7 +1169,7 @@ fn expand_step_aggr_limit() {
|
||||
fn expand_step_aggr_avg_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1153,7 +1202,7 @@ fn expand_step_aggr_avg_limit() {
|
||||
fn expand_part_col_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1332,10 +1381,73 @@ fn transform_unalighed_join_with_alias() {
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"TableScan: t",
|
||||
"]]",
|
||||
" SubqueryAlias: right",
|
||||
" Projection: t.number",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"TableScan: t",
|
||||
" Projection: right.number",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"SubqueryAlias: right",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_subquery_sort_alias() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.alias("a")
|
||||
.unwrap()
|
||||
.sort(vec![col("a.number").sort(true, false)])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = [
|
||||
"Projection: a.pk1, a.pk2, a.pk3, a.ts, a.number",
|
||||
" MergeSort: a.number ASC NULLS LAST",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Sort: a.number ASC NULLS LAST",
|
||||
" SubqueryAlias: a",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_sort_subquery_alias() {
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.sort(vec![col("t.number").sort(true, false)])
|
||||
.unwrap()
|
||||
.alias("a")
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = [
|
||||
"Projection: a.pk1, a.pk2, a.pk3, a.ts, a.number",
|
||||
" MergeSort: a.number ASC NULLS LAST",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"SubqueryAlias: a",
|
||||
" Sort: t.number ASC NULLS LAST",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -187,7 +187,7 @@ impl Categorizer {
|
||||
LogicalPlan::TableScan(_) => Commutativity::Commutative,
|
||||
LogicalPlan::EmptyRelation(_) => Commutativity::NonCommutative,
|
||||
LogicalPlan::Subquery(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::SubqueryAlias(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::SubqueryAlias(_) => Commutativity::Commutative,
|
||||
LogicalPlan::Limit(limit) => {
|
||||
// Only execute `fetch` on remote nodes.
|
||||
// wait for https://github.com/apache/arrow-datafusion/pull/7669
|
||||
@@ -302,6 +302,10 @@ impl Categorizer {
|
||||
|
||||
/// Return true if the given expr and partition cols satisfied the rule.
|
||||
/// In this case the plan can be treated as fully commutative.
|
||||
///
|
||||
/// So only if all partition columns show up in `exprs`, return true.
|
||||
/// Otherwise return false.
|
||||
///
|
||||
fn check_partition(exprs: &[Expr], partition_cols: &AliasMapping) -> bool {
|
||||
let mut ref_cols = HashSet::new();
|
||||
for expr in exprs {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user