Compare commits

..

2 Commits

Author SHA1 Message Date
WenyXu
07b2ea096b feat(standalone): support to dump/restore metadata 2025-04-20 08:13:35 +00:00
WenyXu
d55d9addf2 feat: introduce MetadataSnaphostManager 2025-04-20 06:32:56 +00:00
139 changed files with 13466 additions and 19116 deletions

View File

@@ -7,8 +7,7 @@ meta:
provider = "kafka"
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
num_topics = 3
auto_prune_interval = "30s"
trigger_flush_threshold = 100
auto_prune_topic_records = true
[datanode]
[datanode.client]
@@ -23,7 +22,6 @@ datanode:
provider = "kafka"
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
linger = "2ms"
overwrite_entry_start_id = true
frontend:
configData: |-
[runtime]

View File

@@ -21,6 +21,32 @@ jobs:
run: sudo apt-get install -y jq
# Make the check.sh script executable
- name: Check grafana dashboards
- name: Make check.sh executable
run: chmod +x grafana/check.sh
# Run the check.sh script
- name: Run check.sh
run: ./grafana/check.sh
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
- name: Check if this is a pull request
id: check-pr
run: |
make check-dashboards
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "is_pull_request=true" >> $GITHUB_OUTPUT
else
echo "is_pull_request=false" >> $GITHUB_OUTPUT
fi
# Make the summary.sh script executable
- name: Make summary.sh executable
if: steps.check-pr.outputs.is_pull_request == 'true'
run: chmod +x grafana/summary.sh
# Run the summary.sh script and add its output to the GitHub Job Summary
- name: Run summary.sh and add to Job Summary
if: steps.check-pr.outputs.is_pull_request == 'true'
run: |
SUMMARY=$(./grafana/summary.sh)
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY

512
Cargo.lock generated
View File

@@ -266,61 +266,25 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74"
dependencies = [
"arrow-arith 53.4.1",
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-cast 53.4.1",
"arrow-csv 53.4.1",
"arrow-data 53.4.1",
"arrow-ipc 53.4.1",
"arrow-json 53.4.1",
"arrow-ord 53.4.1",
"arrow-row 53.4.1",
"arrow-schema 53.4.1",
"arrow-select 53.4.1",
"arrow-string 53.4.1",
]
[[package]]
name = "arrow"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1"
dependencies = [
"arrow-arith 54.2.1",
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-cast 54.2.1",
"arrow-csv 54.2.1",
"arrow-data 54.3.1",
"arrow-ipc 54.2.1",
"arrow-json 54.2.1",
"arrow-ord 54.2.1",
"arrow-row 54.2.1",
"arrow-schema 54.3.1",
"arrow-select 54.2.1",
"arrow-string 54.2.1",
]
[[package]]
name = "arrow-arith"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"chrono",
"half",
"num",
"arrow-arith",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-csv",
"arrow-data",
"arrow-ipc",
"arrow-json",
"arrow-ord",
"arrow-row",
"arrow-schema",
"arrow-select",
"arrow-string",
]
[[package]]
@@ -329,30 +293,14 @@ version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"num",
]
[[package]]
name = "arrow-array"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf"
dependencies = [
"ahash 0.8.11",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"chrono",
"half",
"hashbrown 0.15.2",
"num",
]
[[package]]
name = "arrow-array"
version = "54.2.1"
@@ -360,9 +308,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5"
dependencies = [
"ahash 0.8.11",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"chrono-tz",
"half",
@@ -370,17 +318,6 @@ dependencies = [
"num",
]
[[package]]
name = "arrow-buffer"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8"
dependencies = [
"bytes",
"half",
"num",
]
[[package]]
name = "arrow-buffer"
version = "54.3.1"
@@ -392,37 +329,17 @@ dependencies = [
"num",
]
[[package]]
name = "arrow-cast"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"arrow-select 53.4.1",
"atoi",
"base64 0.22.1",
"chrono",
"half",
"lexical-core",
"num",
"ryu",
]
[[package]]
name = "arrow-cast"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-select 54.2.1",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
"atoi",
"base64 0.22.1",
"chrono",
@@ -433,34 +350,15 @@ dependencies = [
"ryu",
]
[[package]]
name = "arrow-csv"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-cast 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"chrono",
"csv",
"csv-core",
"lazy_static",
"lexical-core",
"regex",
]
[[package]]
name = "arrow-csv"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65"
dependencies = [
"arrow-array 54.2.1",
"arrow-cast 54.2.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-cast",
"arrow-schema",
"chrono",
"csv",
"csv-core",
@@ -468,26 +366,14 @@ dependencies = [
"regex",
]
[[package]]
name = "arrow-data"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb"
dependencies = [
"arrow-buffer 53.4.1",
"arrow-schema 53.4.1",
"half",
"num",
]
[[package]]
name = "arrow-data"
version = "54.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429"
dependencies = [
"arrow-buffer 54.3.1",
"arrow-schema 54.3.1",
"arrow-buffer",
"arrow-schema",
"half",
"num",
]
@@ -498,11 +384,11 @@ version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7408f2bf3b978eddda272c7699f439760ebc4ac70feca25fefa82c5b8ce808d"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-cast 54.2.1",
"arrow-ipc 54.2.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-ipc",
"arrow-schema",
"base64 0.22.1",
"bytes",
"futures",
@@ -511,67 +397,32 @@ dependencies = [
"tonic 0.12.3",
]
[[package]]
name = "arrow-ipc"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-cast 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"flatbuffers",
"zstd 0.13.2",
]
[[package]]
name = "arrow-ipc"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"flatbuffers",
"lz4_flex",
"zstd 0.13.2",
]
[[package]]
name = "arrow-json"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-cast 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"chrono",
"half",
"indexmap 2.9.0",
"lexical-core",
"num",
"serde",
"serde_json",
]
[[package]]
name = "arrow-json"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-cast 54.2.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-schema",
"chrono",
"half",
"indexmap 2.9.0",
@@ -581,46 +432,17 @@ dependencies = [
"serde_json",
]
[[package]]
name = "arrow-ord"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"arrow-select 53.4.1",
"half",
"num",
]
[[package]]
name = "arrow-ord"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-select 54.2.1",
]
[[package]]
name = "arrow-row"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735"
dependencies = [
"ahash 0.8.11",
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"half",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
]
[[package]]
@@ -629,19 +451,13 @@ version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"half",
]
[[package]]
name = "arrow-schema"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8"
[[package]]
name = "arrow-schema"
version = "54.3.1"
@@ -651,20 +467,6 @@ dependencies = [
"serde",
]
[[package]]
name = "arrow-select"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984"
dependencies = [
"ahash 0.8.11",
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"num",
]
[[package]]
name = "arrow-select"
version = "54.2.1"
@@ -672,41 +474,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c"
dependencies = [
"ahash 0.8.11",
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"num",
]
[[package]]
name = "arrow-string"
version = "53.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce"
dependencies = [
"arrow-array 53.4.1",
"arrow-buffer 53.4.1",
"arrow-data 53.4.1",
"arrow-schema 53.4.1",
"arrow-select 53.4.1",
"memchr",
"num",
"regex",
"regex-syntax 0.8.5",
]
[[package]]
name = "arrow-string"
version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458"
dependencies = [
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-data 54.3.1",
"arrow-schema 54.3.1",
"arrow-select 54.2.1",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"arrow-select",
"memchr",
"num",
"regex",
@@ -1564,8 +1349,8 @@ name = "catalog"
version = "0.14.0"
dependencies = [
"api",
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"async-stream",
"async-trait",
"bytes",
@@ -2155,8 +1940,8 @@ dependencies = [
name = "common-datasource"
version = "0.14.0"
dependencies = [
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"async-compression 0.3.15",
"async-trait",
"bytes",
@@ -2392,6 +2177,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-telemetry",
"common-test-util",
"common-time",
"common-wal",
"datafusion-common",
@@ -2401,6 +2187,7 @@ dependencies = [
"deadpool-postgres",
"derive_builder 0.20.1",
"etcd-client",
"flexbuffers",
"futures",
"futures-util",
"hex",
@@ -2409,6 +2196,7 @@ dependencies = [
"itertools 0.14.0",
"lazy_static",
"moka",
"object-store",
"prometheus",
"prost 0.13.5",
"rand 0.9.0",
@@ -2618,7 +2406,7 @@ dependencies = [
name = "common-time"
version = "0.14.0"
dependencies = [
"arrow 54.2.1",
"arrow",
"chrono",
"chrono-tz",
"common-error",
@@ -3119,10 +2907,10 @@ name = "datafusion"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-ipc 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-ipc",
"arrow-schema",
"async-compression 0.4.13",
"async-trait",
"bytes",
@@ -3170,7 +2958,7 @@ name = "datafusion-catalog"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow",
"async-trait",
"dashmap",
"datafusion-common",
@@ -3190,8 +2978,8 @@ name = "datafusion-catalog-listing"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"chrono",
"datafusion-catalog",
"datafusion-common",
@@ -3214,10 +3002,10 @@ version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-ipc 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-ipc",
"arrow-schema",
"base64 0.22.1",
"half",
"hashbrown 0.14.5",
@@ -3252,7 +3040,7 @@ name = "datafusion-execution"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow",
"dashmap",
"datafusion-common",
"datafusion-expr",
@@ -3270,7 +3058,7 @@ name = "datafusion-expr"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow",
"chrono",
"datafusion-common",
"datafusion-doc",
@@ -3290,7 +3078,7 @@ name = "datafusion-expr-common"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow",
"datafusion-common",
"itertools 0.14.0",
"paste",
@@ -3301,8 +3089,8 @@ name = "datafusion-functions"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow-buffer 54.3.1",
"arrow",
"arrow-buffer",
"base64 0.22.1",
"blake2",
"blake3",
@@ -3331,8 +3119,8 @@ version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"datafusion-common",
"datafusion-doc",
"datafusion-execution",
@@ -3352,7 +3140,7 @@ version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
"arrow",
"datafusion-common",
"datafusion-expr-common",
"datafusion-physical-expr-common",
@@ -3363,10 +3151,10 @@ name = "datafusion-functions-nested"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-ord 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-ord",
"arrow-schema",
"datafusion-common",
"datafusion-doc",
"datafusion-execution",
@@ -3385,7 +3173,7 @@ name = "datafusion-functions-table"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow",
"async-trait",
"datafusion-catalog",
"datafusion-common",
@@ -3435,7 +3223,7 @@ name = "datafusion-optimizer"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow",
"chrono",
"datafusion-common",
"datafusion-expr",
@@ -3454,9 +3242,9 @@ version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-schema",
"datafusion-common",
"datafusion-expr",
"datafusion-expr-common",
@@ -3477,7 +3265,7 @@ version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
"arrow",
"datafusion-common",
"datafusion-expr-common",
"hashbrown 0.14.5",
@@ -3489,8 +3277,8 @@ name = "datafusion-physical-optimizer"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
@@ -3511,10 +3299,10 @@ version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-ord 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-ord",
"arrow-schema",
"async-trait",
"chrono",
"datafusion-common",
@@ -3540,9 +3328,9 @@ name = "datafusion-sql"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-schema",
"bigdecimal 0.4.8",
"datafusion-common",
"datafusion-expr",
@@ -3635,9 +3423,9 @@ dependencies = [
name = "datatypes"
version = "0.14.0"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-array",
"arrow-schema",
"base64 0.22.1",
"common-base",
"common-decimal",
@@ -4350,6 +4138,19 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "flexbuffers"
version = "25.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "935627e7bc8f083035d9faad09ffaed9128f73fb1f74a8798f115749c43378e8"
dependencies = [
"bitflags 1.3.2",
"byteorder",
"num_enum",
"serde",
"serde_derive",
]
[[package]]
name = "float-cmp"
version = "0.10.0"
@@ -4385,8 +4186,8 @@ name = "flow"
version = "0.14.0"
dependencies = [
"api",
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"async-recursion",
"async-trait",
"bytes",
@@ -4505,7 +4306,6 @@ dependencies = [
"arc-swap",
"async-trait",
"auth",
"bytes",
"cache",
"catalog",
"client",
@@ -4540,7 +4340,6 @@ dependencies = [
"num_cpus",
"opentelemetry-proto 0.27.0",
"operator",
"otel-arrow-rust",
"partition",
"pipeline",
"prometheus",
@@ -4944,7 +4743,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=e82b0158cd38d4021edb4e4c0ae77f999051e62f#e82b0158cd38d4021edb4e4c0ae77f999051e62f"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=b6d9cffd43c4e6358805a798f17e03e232994b82#b6d9cffd43c4e6358805a798f17e03e232994b82"
dependencies = [
"prost 0.13.5",
"serde",
@@ -7765,23 +7564,23 @@ dependencies = [
[[package]]
name = "num_enum"
version = "0.7.3"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179"
checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9"
dependencies = [
"num_enum_derive",
]
[[package]]
name = "num_enum_derive"
version = "0.7.3"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56"
checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799"
dependencies = [
"proc-macro-crate 1.3.1",
"proc-macro2",
"quote",
"syn 2.0.100",
"syn 1.0.109",
]
[[package]]
@@ -8178,7 +7977,7 @@ name = "orc-rust"
version = "0.6.0"
source = "git+https://github.com/datafusion-contrib/orc-rust?rev=3134cab581a8e91b942d6a23aca2916ea965f6bb#3134cab581a8e91b942d6a23aca2916ea965f6bb"
dependencies = [
"arrow 54.2.1",
"arrow",
"async-trait",
"bytemuck",
"bytes",
@@ -8264,24 +8063,6 @@ version = "6.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
[[package]]
name = "otel-arrow-rust"
version = "0.1.0"
source = "git+https://github.com/open-telemetry/otel-arrow?rev=5d551412d2a12e689cde4d84c14ef29e36784e51#5d551412d2a12e689cde4d84c14ef29e36784e51"
dependencies = [
"arrow 53.4.1",
"arrow-ipc 53.4.1",
"lazy_static",
"num_enum",
"opentelemetry-proto 0.27.0",
"paste",
"prost 0.13.5",
"serde",
"snafu 0.8.5",
"tonic 0.12.3",
"tonic-build 0.12.3",
]
[[package]]
name = "overload"
version = "0.1.1"
@@ -8380,13 +8161,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd"
dependencies = [
"ahash 0.8.11",
"arrow-array 54.2.1",
"arrow-buffer 54.3.1",
"arrow-cast 54.2.1",
"arrow-data 54.3.1",
"arrow-ipc 54.2.1",
"arrow-schema 54.3.1",
"arrow-select 54.2.1",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-ipc",
"arrow-schema",
"arrow-select",
"base64 0.22.1",
"brotli",
"bytes",
@@ -8707,7 +8488,7 @@ version = "0.14.0"
dependencies = [
"ahash 0.8.11",
"api",
"arrow 54.2.1",
"arrow",
"async-trait",
"catalog",
"chrono",
@@ -9417,8 +9198,8 @@ dependencies = [
"ahash 0.8.11",
"api",
"arc-swap",
"arrow 54.2.1",
"arrow-schema 54.3.1",
"arrow",
"arrow-schema",
"async-recursion",
"async-stream",
"async-trait",
@@ -10833,10 +10614,10 @@ version = "0.14.0"
dependencies = [
"ahash 0.8.11",
"api",
"arrow 54.2.1",
"arrow",
"arrow-flight",
"arrow-ipc 54.2.1",
"arrow-schema 54.3.1",
"arrow-ipc",
"arrow-schema",
"async-trait",
"auth",
"axum 0.8.1",
@@ -10899,7 +10680,6 @@ dependencies = [
"openmetrics-parser",
"opensrv-mysql",
"opentelemetry-proto 0.27.0",
"otel-arrow-rust",
"parking_lot 0.12.3",
"permutation",
"pgwire",

View File

@@ -129,7 +129,7 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "e82b0158cd38d4021edb4e4c0ae77f999051e62f" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b6d9cffd43c4e6358805a798f17e03e232994b82" }
hex = "0.4"
http = "1"
humantime = "2.1"
@@ -269,9 +269,6 @@ metric-engine = { path = "src/metric-engine" }
mito2 = { path = "src/mito2" }
object-store = { path = "src/object-store" }
operator = { path = "src/operator" }
otel-arrow-rust = { git = "https://github.com/open-telemetry/otel-arrow", rev = "5d551412d2a12e689cde4d84c14ef29e36784e51", features = [
"server",
] }
partition = { path = "src/partition" }
pipeline = { path = "src/pipeline" }
plugins = { path = "src/plugins" }

View File

@@ -222,16 +222,6 @@ start-cluster: ## Start the greptimedb cluster with etcd by using docker compose
stop-cluster: ## Stop the greptimedb cluster that created by docker compose.
docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml stop
##@ Grafana
.PHONY: check-dashboards
check-dashboards: ## Check the Grafana dashboards.
@./grafana/scripts/check.sh
.PHONY: dashboards
dashboards: ## Generate the Grafana dashboards for standalone mode and intermediate dashboards.
@./grafana/scripts/gen-dashboards.sh
##@ Docs
config-docs: ## Generate configuration documentation from toml files.
docker run --rm \

View File

@@ -6,7 +6,7 @@
</picture>
</p>
<h2 align="center">Real-Time & Cloud-Native Observability Database<br/>for metrics, logs, and traces</h2>
<h2 align="center">Unified & Cost-Effective Observability Database for Metrics, Logs, and Events</h2>
<div align="center">
<h3 align="center">

View File

@@ -1,83 +1,61 @@
# Grafana dashboards for GreptimeDB
Grafana dashboard for GreptimeDB
--------------------------------
## Overview
GreptimeDB's official Grafana dashboard.
This repository maintains the Grafana dashboards for GreptimeDB. It has two types of dashboards:
Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
- `cluster/`: The dashboard for the GreptimeDB cluster. Read the [dashboard.md](./dashboards/cluster/dashboard.md) for more details.
- `standalone/`: The dashboard for the standalone GreptimeDB instance. Read the [dashboard.md](./dashboards/standalone/dashboard.md) for more details.
As the rapid development of GreptimeDB, the metrics may be changed, and please feel free to submit your feedback and/or contribution to this dashboard 🤗
To maintain the dashboards, we use the [`dac`](https://github.com/zyy17/dac) tool to generate the intermediate dashboards and markdown documents:
- `cluster/dashboard.yaml`: The intermediate dashboard for the GreptimeDB cluster.
- `standalone/dashboard.yaml`: The intermediatedashboard for the standalone GreptimeDB instance.
## Data Sources
There are two data sources for the dashboards to fetch the metrics:
- **Prometheus**: Expose the metrics of GreptimeDB.
- **Information Schema**: It is the MySQL port of the current monitored instance. The `overview` dashboard will use this datasource to show the information schema of the current instance.
## Instance Filters
To deploy the dashboards for multiple scenarios (K8s, bare metal, etc.), we prefer to use the `instance` label when filtering instances.
Additionally, we recommend including the `pod` label in the legend to make it easier to identify each instance, even though this field will be empty in bare metal scenarios.
For example, the following query is recommended:
```promql
sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
```
And the legend will be like: `[{{instance}}]-[{{ pod }}]`.
## Deployment
### Helm
If you use the Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy a GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
If you use Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
- `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
- `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;
The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
The standalone GreptimeDB instance will collect metrics from your cluster and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
### Self-host Prometheus and import dashboards manually
# How to use
1. **Configure Prometheus to scrape the cluster**
## `greptimedb.json`
The following is an example configuration(**Please modify it according to your actual situation**):
Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.
```yml
# example config
# only to indicate how to assign labels to each target
# modify yours accordingly
scrape_configs:
- job_name: metasrv
static_configs:
- targets: ['<metasrv-ip>:<port>']
## `greptimedb-cluster.json`
- job_name: datanode
static_configs:
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
This cluster dashboard provides a comprehensive view of incoming requests, response statuses, and internal activities such as flush and compaction, with a layered structure from frontend to datanode. Designed with a focus on alert functionality, its primary aim is to highlight any anomalies in metrics, allowing users to quickly pinpoint the cause of errors.
- job_name: frontend
static_configs:
- targets: ['<frontend-ip>:<port>']
```
We use Prometheus to scrape off metrics from nodes in GreptimeDB cluster, Grafana to visualize the diagram. Any compatible stack should work too.
2. **Configure the data sources in Grafana**
__Note__: This dashboard is still in an early stage of development. Any issue or advice on improvement is welcomed.
You need to add two data sources in Grafana:
### Configuration
- Prometheus: It is the Prometheus instance that scrapes the GreptimeDB metrics.
- Information Schema: It is the MySQL port of the current monitored instance. The dashboard will use this datasource to show the information schema of the current instance.
Please ensure the following configuration before importing the dashboard into Grafana.
3. **Import the dashboards based on your deployment scenario**
__1. Prometheus scrape config__
- **Cluster**: Import the `cluster/dashboard.json` dashboard.
- **Standalone**: Import the `standalone/dashboard.json` dashboard.
Configure Prometheus to scrape the cluster.
```yml
# example config
# only to indicate how to assign labels to each target
# modify yours accordingly
scrape_configs:
- job_name: metasrv
static_configs:
- targets: ['<metasrv-ip>:<port>']
- job_name: datanode
static_configs:
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
- job_name: frontend
static_configs:
- targets: ['<frontend-ip>:<port>']
```
__2. Grafana config__
Create a Prometheus data source in Grafana before using this dashboard. We use `datasource` as a variable in Grafana dashboard so that multiple environments are supported.
### Usage
Use `datasource` or `instance` on the upper-left corner to filter data from certain node.

19
grafana/check.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
exit 0
fi

File diff suppressed because it is too large Load Diff

View File

@@ -1,96 +0,0 @@
# Overview
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `s` | `prometheus` | `__auto` |
| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | -- | `mysql` | -- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `rowsps` | `prometheus` | `__auto` |
| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `decbytes` | `mysql` | -- |
| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `sishort` | `mysql` | -- |
| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | -- | `mysql` | -- |
| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | -- | `mysql` | -- |
| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `decbytes` | `mysql` | -- |
# Ingestion
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `rowsps` | `prometheus` | `ingestion` |
| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `rowsps` | `prometheus` | `http-logs` |
# Queries
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `reqps` | `prometheus` | `mysql` |
# Resources
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{instance}}]-[{{ pod }}]` |
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]-cpu` |
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{ instance }}]-[{{ pod }}]-resident` |
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
# Frontend Requests
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `s` | `prometheus` | `[{{ instance }}]-[{{ pod }}]-p99` |
| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-p99` |
# Frontend to Datanode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `rowsps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
# Mito Engine
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{instance=~"$datanode"}` | `timeseries` | Write Buffer per Instance. | `decbytes` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `rowsps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})` | `timeseries` | Write Stall per Instance. | `decbytes` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `ops` | `prometheus` | `[{{ instance }}]-[{{pod}}]` |
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `bytes` | `prometheus` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `decbytes` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `none` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-p99` |
| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `none` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
# Metasrv
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `none` | `prometheus` | `from-datanode-{{datanode_id}}` |
| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `none` | `prometheus` | `__auto` |
| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `none` | `prometheus` | `__auto` |
# Flownode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | -- | `prometheus` | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | -- | `prometheus` | `[{{instance}}]-[{{pod}}]-p95` |
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | -- | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | -- | `prometheus` | `[{{instance}}]-[{{pod}]` |
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | -- | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{code}}]` |

View File

@@ -1,761 +0,0 @@
groups:
- title: Overview
panels:
- title: Uptime
type: stat
description: The start time of GreptimeDB.
unit: s
queries:
- expr: time() - process_start_time_seconds
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Version
type: stat
description: GreptimeDB version.
queries:
- expr: SELECT pkg_version FROM information_schema.build_info
datasource:
type: mysql
uid: ${information_schema}
- title: Total Ingestion Rate
type: stat
description: Total ingestion rate.
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Total Storage Size
type: stat
description: Total number of data file size.
unit: decbytes
queries:
- expr: select SUM(disk_size) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Total Rows
type: stat
description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
unit: sishort
queries:
- expr: select SUM(region_rows) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Deployment
type: stat
description: The deployment topology of GreptimeDB.
queries:
- expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
datasource:
type: mysql
uid: ${information_schema}
- title: Database Resources
type: stat
description: The number of the key resources in GreptimeDB.
queries:
- expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as flows FROM information_schema.flows
datasource:
type: mysql
uid: ${information_schema}
- title: Data Size
type: stat
description: The data size of wal/index/manifest in the GreptimeDB.
unit: decbytes
queries:
- expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Ingestion
panels:
- title: Total Ingestion Rate
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: ingestion
- title: Ingestion Rate by Type
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: http-logs
- expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: prometheus-remote-write
- title: Queries
panels:
- title: Total Query Rate
type: timeseries
description: |-
Total rate of query API calls by protocol. This metric is collected from frontends.
Here we listed 3 main protocols:
- MySQL
- Postgres
- Prometheus API
Note that there are some other minor query APIs like /sql are not included
unit: reqps
queries:
- expr: sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: mysql
- expr: sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: pg
- expr: sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: promql
- title: Resources
panels:
- title: Datanode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{ pod }}]'
- title: Datanode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
- title: Metasrv Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
- title: Metasrv CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Requests
panels:
- title: HTTP QPS per Instance
type: timeseries
description: HTTP QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
- title: HTTP P99 per Instance
type: timeseries
description: HTTP P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: gRPC QPS per Instance
type: timeseries
description: gRPC QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
- title: gRPC P99 per Instance
type: timeseries
description: gRPC P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: MySQL QPS per Instance
type: timeseries
description: MySQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: MySQL P99 per Instance
type: timeseries
description: MySQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
- title: PostgreSQL QPS per Instance
type: timeseries
description: PostgreSQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: PostgreSQL P99 per Instance
type: timeseries
description: PostgreSQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Frontend to Datanode
panels:
- title: Ingest Rows per Instance
type: timeseries
description: Ingestion rate by row as in each frontend
unit: rowsps
queries:
- expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Region Call QPS per Instance
type: timeseries
description: Region Call QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Region Call P99 per Instance
type: timeseries
description: Region Call P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Mito Engine
panels:
- title: Request OPS per Instance
type: timeseries
description: Request QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Request P99 per Instance
type: timeseries
description: Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Write Buffer per Instance
type: timeseries
description: Write Buffer per Instance.
unit: decbytes
queries:
- expr: greptime_mito_write_buffer_bytes{instance=~"$datanode"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Write Rows per Instance
type: timeseries
description: Ingestion size by row counts.
unit: rowsps
queries:
- expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Flush OPS per Instance
type: timeseries
description: Flush QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
- title: Write Stall per Instance
type: timeseries
description: Write Stall per Instance.
unit: decbytes
queries:
- expr: sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage OPS per Instance
type: timeseries
description: Read Stage OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage P99 per Instance
type: timeseries
description: Read Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Write Stage P99 per Instance
type: timeseries
description: Write Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Compaction OPS per Instance
type: timeseries
description: Compaction OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- title: Compaction P99 per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
- title: WAL write size
type: timeseries
description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
unit: bytes
queries:
- expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
- expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
- expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
- title: Cached Bytes per Instance
type: timeseries
description: Cached Bytes per Instance.
unit: decbytes
queries:
- expr: greptime_mito_cache_bytes{instance=~"$datanode"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Inflight Compaction
type: timeseries
description: Ongoing compaction task count
unit: none
queries:
- expr: greptime_mito_inflight_compaction_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: WAL sync duration seconds
type: timeseries
description: Raft engine (local disk) log store sync latency, p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Log Store op duration seconds
type: timeseries
description: Write-ahead log operations latency at p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
- title: Inflight Flush
type: timeseries
description: Ongoing flush task count
unit: none
queries:
- expr: greptime_mito_inflight_flush_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: OpenDAL
panels:
- title: QPS per Instance
type: timeseries
description: QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Read QPS per Instance
type: timeseries
description: Read QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Read P99 per Instance
type: timeseries
description: Read P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write QPS per Instance
type: timeseries
description: Write QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write P99 per Instance
type: timeseries
description: Write P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List QPS per Instance
type: timeseries
description: List QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List P99 per Instance
type: timeseries
description: List P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Other Requests per Instance
type: timeseries
description: Other Requests per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read|write|list|stat"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Other Request P99 per Instance
type: timeseries
description: Other Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Opendal traffic
type: timeseries
description: Total traffic as in bytes by instance and operation
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Metasrv
panels:
- title: Region migration datanode
type: state-timeline
description: Counter of region migration by source and destination
unit: none
queries:
- expr: greptime_meta_region_migration_stat{datanode_type="src"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: from-datanode-{{datanode_id}}
- expr: greptime_meta_region_migration_stat{datanode_type="desc"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: to-datanode-{{datanode_id}}
- title: Region migration error
type: timeseries
description: Counter of region migration error
unit: none
queries:
- expr: greptime_meta_region_migration_error
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Datanode load
type: timeseries
description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
unit: none
queries:
- expr: greptime_datanode_load
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Flownode
panels:
- title: Flow Ingest / Output Rate
type: timeseries
description: Flow Ingest / Output Rate.
queries:
- expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
- title: Flow Ingest Latency
type: timeseries
description: Flow Ingest Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Flow Operation Latency
type: timeseries
description: Flow Operation Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
- title: Flow Buffer Size per Instance
type: timeseries
description: Flow Buffer Size per Instance.
queries:
- expr: greptime_flow_input_buf_size
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}]'
- title: Flow Processing Error per Instance
type: timeseries
description: Flow Processing Error per Instance.
queries:
- expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'

File diff suppressed because it is too large Load Diff

View File

@@ -1,96 +0,0 @@
# Overview
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `s` | `prometheus` | `__auto` |
| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | -- | `mysql` | -- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `rowsps` | `prometheus` | `__auto` |
| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `decbytes` | `mysql` | -- |
| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `sishort` | `mysql` | -- |
| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | -- | `mysql` | -- |
| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | -- | `mysql` | -- |
| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `decbytes` | `mysql` | -- |
# Ingestion
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `rowsps` | `prometheus` | `ingestion` |
| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `rowsps` | `prometheus` | `http-logs` |
# Queries
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `reqps` | `prometheus` | `mysql` |
# Resources
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{instance}}]-[{{ pod }}]` |
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]-cpu` |
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{ instance }}]-[{{ pod }}]-resident` |
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `decbytes` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `none` | `prometheus` | `[{{ instance }}]-[{{ pod }}]` |
# Frontend Requests
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `s` | `prometheus` | `[{{ instance }}]-[{{ pod }}]-p99` |
| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `reqps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-p99` |
# Frontend to Datanode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `rowsps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
# Mito Engine
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{}` | `timeseries` | Write Buffer per Instance. | `decbytes` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `rowsps` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{})` | `timeseries` | Write Stall per Instance. | `decbytes` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `ops` | `prometheus` | `[{{ instance }}]-[{{pod}}]` |
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `bytes` | `prometheus` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `decbytes` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `none` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-p99` |
| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `none` | `prometheus` | `[{{instance}}]-[{{pod}}]` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `s` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `ops` | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
# Metasrv
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `none` | `prometheus` | `from-datanode-{{datanode_id}}` |
| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `none` | `prometheus` | `__auto` |
| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `none` | `prometheus` | `__auto` |
# Flownode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | -- | `prometheus` | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | -- | `prometheus` | `[{{instance}}]-[{{pod}}]-p95` |
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | -- | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | -- | `prometheus` | `[{{instance}}]-[{{pod}]` |
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | -- | `prometheus` | `[{{instance}}]-[{{pod}}]-[{{code}}]` |

View File

@@ -1,761 +0,0 @@
groups:
- title: Overview
panels:
- title: Uptime
type: stat
description: The start time of GreptimeDB.
unit: s
queries:
- expr: time() - process_start_time_seconds
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Version
type: stat
description: GreptimeDB version.
queries:
- expr: SELECT pkg_version FROM information_schema.build_info
datasource:
type: mysql
uid: ${information_schema}
- title: Total Ingestion Rate
type: stat
description: Total ingestion rate.
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Total Storage Size
type: stat
description: Total number of data file size.
unit: decbytes
queries:
- expr: select SUM(disk_size) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Total Rows
type: stat
description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
unit: sishort
queries:
- expr: select SUM(region_rows) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Deployment
type: stat
description: The deployment topology of GreptimeDB.
queries:
- expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
datasource:
type: mysql
uid: ${information_schema}
- title: Database Resources
type: stat
description: The number of the key resources in GreptimeDB.
queries:
- expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as flows FROM information_schema.flows
datasource:
type: mysql
uid: ${information_schema}
- title: Data Size
type: stat
description: The data size of wal/index/manifest in the GreptimeDB.
unit: decbytes
queries:
- expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Ingestion
panels:
- title: Total Ingestion Rate
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: ingestion
- title: Ingestion Rate by Type
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: http-logs
- expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: prometheus-remote-write
- title: Queries
panels:
- title: Total Query Rate
type: timeseries
description: |-
Total rate of query API calls by protocol. This metric is collected from frontends.
Here we listed 3 main protocols:
- MySQL
- Postgres
- Prometheus API
Note that there are some other minor query APIs like /sql are not included
unit: reqps
queries:
- expr: sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: mysql
- expr: sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: pg
- expr: sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: promql
- title: Resources
panels:
- title: Datanode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{ pod }}]'
- title: Datanode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
- title: Metasrv Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
- title: Metasrv CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Requests
panels:
- title: HTTP QPS per Instance
type: timeseries
description: HTTP QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health|/metrics"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
- title: HTTP P99 per Instance
type: timeseries
description: HTTP P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health|/metrics"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: gRPC QPS per Instance
type: timeseries
description: gRPC QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
- title: gRPC P99 per Instance
type: timeseries
description: gRPC P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: MySQL QPS per Instance
type: timeseries
description: MySQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: MySQL P99 per Instance
type: timeseries
description: MySQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
- title: PostgreSQL QPS per Instance
type: timeseries
description: PostgreSQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: PostgreSQL P99 per Instance
type: timeseries
description: PostgreSQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Frontend to Datanode
panels:
- title: Ingest Rows per Instance
type: timeseries
description: Ingestion rate by row as in each frontend
unit: rowsps
queries:
- expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Region Call QPS per Instance
type: timeseries
description: Region Call QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Region Call P99 per Instance
type: timeseries
description: Region Call P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Mito Engine
panels:
- title: Request OPS per Instance
type: timeseries
description: Request QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Request P99 per Instance
type: timeseries
description: Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Write Buffer per Instance
type: timeseries
description: Write Buffer per Instance.
unit: decbytes
queries:
- expr: greptime_mito_write_buffer_bytes{}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Write Rows per Instance
type: timeseries
description: Ingestion size by row counts.
unit: rowsps
queries:
- expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Flush OPS per Instance
type: timeseries
description: Flush QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
- title: Write Stall per Instance
type: timeseries
description: Write Stall per Instance.
unit: decbytes
queries:
- expr: sum by(instance, pod) (greptime_mito_write_stall_total{})
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage OPS per Instance
type: timeseries
description: Read Stage OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage P99 per Instance
type: timeseries
description: Read Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Write Stage P99 per Instance
type: timeseries
description: Write Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Compaction OPS per Instance
type: timeseries
description: Compaction OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- title: Compaction P99 per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
- title: WAL write size
type: timeseries
description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
unit: bytes
queries:
- expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
- expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
- expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
- title: Cached Bytes per Instance
type: timeseries
description: Cached Bytes per Instance.
unit: decbytes
queries:
- expr: greptime_mito_cache_bytes{}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Inflight Compaction
type: timeseries
description: Ongoing compaction task count
unit: none
queries:
- expr: greptime_mito_inflight_compaction_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: WAL sync duration seconds
type: timeseries
description: Raft engine (local disk) log store sync latency, p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Log Store op duration seconds
type: timeseries
description: Write-ahead log operations latency at p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
- title: Inflight Flush
type: timeseries
description: Ongoing flush task count
unit: none
queries:
- expr: greptime_mito_inflight_flush_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: OpenDAL
panels:
- title: QPS per Instance
type: timeseries
description: QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Read QPS per Instance
type: timeseries
description: Read QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Read P99 per Instance
type: timeseries
description: Read P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write QPS per Instance
type: timeseries
description: Write QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write P99 per Instance
type: timeseries
description: Write P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List QPS per Instance
type: timeseries
description: List QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List P99 per Instance
type: timeseries
description: List P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Other Requests per Instance
type: timeseries
description: Other Requests per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read|write|list|stat"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Other Request P99 per Instance
type: timeseries
description: Other Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Opendal traffic
type: timeseries
description: Total traffic as in bytes by instance and operation
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Metasrv
panels:
- title: Region migration datanode
type: state-timeline
description: Counter of region migration by source and destination
unit: none
queries:
- expr: greptime_meta_region_migration_stat{datanode_type="src"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: from-datanode-{{datanode_id}}
- expr: greptime_meta_region_migration_stat{datanode_type="desc"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: to-datanode-{{datanode_id}}
- title: Region migration error
type: timeseries
description: Counter of region migration error
unit: none
queries:
- expr: greptime_meta_region_migration_error
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Datanode load
type: timeseries
description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
unit: none
queries:
- expr: greptime_datanode_load
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Flownode
panels:
- title: Flow Ingest / Output Rate
type: timeseries
description: Flow Ingest / Output Rate.
queries:
- expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
- title: Flow Ingest Latency
type: timeseries
description: Flow Ingest Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Flow Operation Latency
type: timeseries
description: Flow Operation Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
- title: Flow Buffer Size per Instance
type: timeseries
description: Flow Buffer Size per Instance.
queries:
- expr: greptime_flow_input_buf_size
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}]'
- title: Flow Processing Error per Instance
type: timeseries
description: Flow Processing Error per Instance.
queries:
- expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'

File diff suppressed because it is too large Load Diff

4159
grafana/greptimedb.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,54 +0,0 @@
#!/usr/bin/env bash
DASHBOARD_DIR=${1:-grafana/dashboards}
check_dashboard_description() {
for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
echo "Checking $dashboard description"
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $dashboard | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
fi
done
}
check_dashboards_generation() {
./grafana/scripts/gen-dashboards.sh
if [[ -n "$(git diff --name-only grafana/dashboards)" ]]; then
echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
exit 1
fi
}
check_datasource() {
for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
echo "Checking $dashboard datasource"
jq -r '.panels[] | select(.type != "row") | .targets[] | [.datasource.type, .datasource.uid] | @tsv' $dashboard | while read -r type uid; do
# if the datasource is prometheus, check if the uid is ${metrics}
if [[ "$type" == "prometheus" && "$uid" != "\${metrics}" ]]; then
echo "Error: The datasource uid of $dashboard is not valid. It should be \${metrics}, got $uid"
exit 1
fi
# if the datasource is mysql, check if the uid is ${information_schema}
if [[ "$type" == "mysql" && "$uid" != "\${information_schema}" ]]; then
echo "Error: The datasource uid of $dashboard is not valid. It should be \${information_schema}, got $uid"
exit 1
fi
done
done
}
check_dashboards_generation
check_dashboard_description
check_datasource

View File

@@ -1,18 +0,0 @@
#! /usr/bin/env bash
CLUSTER_DASHBOARD_DIR=${1:-grafana/dashboards/cluster}
STANDALONE_DASHBOARD_DIR=${2:-grafana/dashboards/standalone}
DAC_IMAGE=ghcr.io/zyy17/dac:20250422-c9435ce
remove_instance_filters() {
# Remove the instance filters for the standalone dashboards.
sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
}
generate_intermediate_dashboards_and_docs() {
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} -i /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.json -o /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.yaml -m > $CLUSTER_DASHBOARD_DIR/dashboard.md
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} -i /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.json -o /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.yaml -m > $STANDALONE_DASHBOARD_DIR/dashboard.md
}
remove_instance_filters
generate_intermediate_dashboards_and_docs

11
grafana/summary.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
echo '| Title | Description | Expressions |
|---|---|---|'
cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels |
map(select(.type == "stat" or .type == "timeseries")) |
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
'

View File

@@ -514,7 +514,6 @@ fn query_request_type(request: &QueryRequest) -> &'static str {
Some(Query::Sql(_)) => "query.sql",
Some(Query::LogicalPlan(_)) => "query.logical_plan",
Some(Query::PromRangeQuery(_)) => "query.prom_range",
Some(Query::InsertIntoPlan(_)) => "query.insert_into_plan",
None => "query.empty",
}
}

View File

@@ -27,7 +27,7 @@ use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableType;
use table::table::adapter::DfTableProviderAdapter;
pub mod dummy_catalog;
mod dummy_catalog;
use dummy_catalog::DummyCatalogList;
use table::TableRef;

View File

@@ -78,6 +78,13 @@ pub enum Error {
source: datanode::error::Error,
},
#[snafu(display("Failed to build object storage manager"))]
BuildObjectStorageManager {
#[snafu(implicit)]
location: Location,
source: datanode::error::Error,
},
#[snafu(display("Failed to shutdown datanode"))]
ShutdownDatanode {
#[snafu(implicit)]
@@ -328,6 +335,8 @@ impl ErrorExt for Error {
source.status_code()
}
Error::BuildObjectStorageManager { source, .. } => source.status_code(),
Error::MissingConfig { .. }
| Error::LoadLayeredConfig { .. }
| Error::IllegalConfig { .. }

View File

@@ -345,7 +345,7 @@ impl StartCommand {
let client = Arc::new(NodeClients::new(channel_config));
let invoker = FrontendInvoker::build_from(
flownode.flow_engine().streaming_engine(),
flownode.flow_worker_manager().clone(),
catalog_manager.clone(),
cached_meta_backend.clone(),
layered_cache_registry.clone(),
@@ -355,9 +355,7 @@ impl StartCommand {
.await
.context(StartFlownodeSnafu)?;
flownode
.flow_engine()
.streaming_engine()
// TODO(discord9): refactor and avoid circular reference
.flow_worker_manager()
.set_frontend_invoker(invoker)
.await;

View File

@@ -44,6 +44,7 @@ use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::region_registry::LeaderRegionRegistry;
use common_meta::sequence::SequenceBuilder;
use common_meta::snapshot::MetadataSnapshotManager;
use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef};
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
use common_telemetry::info;
@@ -56,8 +57,8 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{
FlowConfig, FlowStreamingEngine, FlownodeBuilder, FlownodeInstance, FlownodeOptions,
FrontendClient, FrontendInvoker, GrpcQueryHandlerWithBoxedError,
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeInstance, FlownodeOptions,
FrontendClient, FrontendInvoker,
};
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::instance::builder::FrontendBuilder;
@@ -497,6 +498,10 @@ impl StartCommand {
.build(),
);
let object_store_manager = DatanodeBuilder::build_object_store_manager(&dn_opts.storage)
.await
.context(error::BuildObjectStorageManagerSnafu)?;
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone(), Mode::Standalone)
.with_kv_backend(kv_backend.clone())
.with_cache_registry(layered_cache_registry.clone())
@@ -524,17 +529,17 @@ impl StartCommand {
..Default::default()
};
// for standalone not use grpc, but get a handler to frontend grpc client without
// TODO(discord9): for standalone not use grpc, but just somehow get a handler to frontend grpc client without
// actually make a connection
let (frontend_client, frontend_instance_handler) =
FrontendClient::from_empty_grpc_handler();
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
let flow_builder = FlownodeBuilder::new(
flownode_options,
plugins.clone(),
table_metadata_manager.clone(),
catalog_manager.clone(),
flow_metadata_manager.clone(),
Arc::new(frontend_client.clone()),
Arc::new(frontend_client),
);
let flownode = flow_builder
.build()
@@ -544,15 +549,15 @@ impl StartCommand {
// set the ref to query for the local flow state
{
let flow_worker_manager = flownode.flow_engine().streaming_engine();
let flow_worker_manager = flownode.flow_worker_manager();
information_extension
.set_flow_worker_manager(flow_worker_manager)
.set_flow_worker_manager(flow_worker_manager.clone())
.await;
}
let node_manager = Arc::new(StandaloneDatanodeManager {
region_server: datanode.region_server(),
flow_server: flownode.flow_engine(),
flow_server: flownode.flow_worker_manager(),
});
let table_id_sequence = Arc::new(
@@ -591,6 +596,11 @@ impl StartCommand {
)
.await?;
let metadata_snapshot_manager = MetadataSnapshotManager::new(
kv_backend.clone(),
object_store_manager.default_object_store().clone(),
);
let fe_instance = FrontendBuilder::new(
fe_opts.clone(),
kv_backend.clone(),
@@ -601,21 +611,13 @@ impl StartCommand {
StatementStatistics::new(opts.logging.slow_query.clone()),
)
.with_plugin(plugins.clone())
.with_metadata_snapshot_manager(metadata_snapshot_manager)
.try_build()
.await
.context(error::StartFrontendSnafu)?;
let fe_instance = Arc::new(fe_instance);
// set the frontend client for flownode
let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
let weak_grpc_handler = Arc::downgrade(&grpc_handler);
frontend_instance_handler
.lock()
.unwrap()
.replace(weak_grpc_handler);
// set the frontend invoker for flownode
let flow_worker_manager = flownode.flow_engine().streaming_engine();
let flow_worker_manager = flownode.flow_worker_manager();
// flow server need to be able to use frontend to write insert requests back
let invoker = FrontendInvoker::build_from(
flow_worker_manager.clone(),
@@ -703,7 +705,7 @@ pub struct StandaloneInformationExtension {
region_server: RegionServer,
procedure_manager: ProcedureManagerRef,
start_time_ms: u64,
flow_worker_manager: RwLock<Option<Arc<FlowStreamingEngine>>>,
flow_worker_manager: RwLock<Option<Arc<FlowWorkerManager>>>,
}
impl StandaloneInformationExtension {
@@ -717,7 +719,7 @@ impl StandaloneInformationExtension {
}
/// Set the flow worker manager for the standalone instance.
pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowStreamingEngine>) {
pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowWorkerManager>) {
let mut guard = self.flow_worker_manager.write().await;
*guard = Some(flow_worker_manager);
}

View File

@@ -31,8 +31,7 @@ impl Plugins {
}
pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
let last = self.write().insert(value);
assert!(last.is_none(), "each type of plugins must be one and only");
let _ = self.write().insert(value);
}
pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
@@ -138,12 +137,4 @@ mod tests {
assert_eq!(plugins.len(), 2);
assert!(!plugins.is_empty());
}
#[test]
#[should_panic(expected = "each type of plugins must be one and only")]
fn test_plugin_uniqueness() {
let plugins = Plugins::new();
plugins.insert(1i32);
plugins.insert(2i32);
}
}

View File

@@ -15,6 +15,7 @@
mod add_region_follower;
mod flush_compact_region;
mod flush_compact_table;
mod metadata_snaphost;
mod migrate_region;
mod remove_region_follower;
@@ -23,6 +24,7 @@ use std::sync::Arc;
use add_region_follower::AddRegionFollowerFunction;
use flush_compact_region::{CompactRegionFunction, FlushRegionFunction};
use flush_compact_table::{CompactTableFunction, FlushTableFunction};
use metadata_snaphost::{DumpMetadataFunction, RestoreMetadataFunction};
use migrate_region::MigrateRegionFunction;
use remove_region_follower::RemoveRegionFollowerFunction;
@@ -43,5 +45,7 @@ impl AdminFunction {
registry.register_async(Arc::new(FlushTableFunction));
registry.register_async(Arc::new(CompactTableFunction));
registry.register_async(Arc::new(FlushFlowFunction));
registry.register_async(Arc::new(DumpMetadataFunction));
registry.register_async(Arc::new(RestoreMetadataFunction));
}
}

View File

@@ -0,0 +1,56 @@
use common_macro::admin_fn;
use common_query::error::{MissingMetadataSnapshotHandlerSnafu, Result};
use common_query::prelude::{Signature, Volatility};
use datatypes::prelude::*;
use session::context::QueryContextRef;
use crate::handlers::MetadataSnapshotHandlerRef;
const METADATA_DIR: &str = "/snaphost/";
const METADATA_FILE_NAME: &str = "dump_metadata";
const METADATA_FILE_EXTENSION: &str = "metadata.fb";
#[admin_fn(
name = DumpMetadataFunction,
display_name = dump_metadata,
sig_fn = dump_signature,
ret = string
)]
pub(crate) async fn dump_metadata(
metadata_snapshot_handler: &MetadataSnapshotHandlerRef,
_query_ctx: &QueryContextRef,
_params: &[ValueRef<'_>],
) -> Result<Value> {
let filename = metadata_snapshot_handler
.dump(METADATA_DIR, METADATA_FILE_NAME)
.await?;
Ok(Value::from(filename))
}
fn dump_signature() -> Signature {
Signature::uniform(0, vec![], Volatility::Immutable)
}
#[admin_fn(
name = RestoreMetadataFunction,
display_name = restore_metadata,
sig_fn = restore_signature,
ret = uint64,
)]
pub(crate) async fn restore_metadata(
metadata_snapshot_handler: &MetadataSnapshotHandlerRef,
_query_ctx: &QueryContextRef,
_params: &[ValueRef<'_>],
) -> Result<Value> {
let num_keyvalues = metadata_snapshot_handler
.restore(
METADATA_DIR,
&format!("{METADATA_FILE_NAME}.{METADATA_FILE_EXTENSION}"),
)
.await?;
Ok(Value::from(num_keyvalues))
}
fn restore_signature() -> Signature {
Signature::uniform(0, vec![], Volatility::Immutable)
}

View File

@@ -89,8 +89,18 @@ pub trait FlowServiceHandler: Send + Sync {
) -> Result<api::v1::flow::FlowResponse>;
}
/// This metadata snapshot handler is only use for dump and restore metadata for now.
#[async_trait]
pub trait MetadataSnapshotHandler: Send + Sync {
async fn dump(&self, path: &str, filename: &str) -> Result<String>;
async fn restore(&self, path: &str, filename: &str) -> Result<u64>;
}
pub type TableMutationHandlerRef = Arc<dyn TableMutationHandler>;
pub type ProcedureServiceHandlerRef = Arc<dyn ProcedureServiceHandler>;
pub type FlowServiceHandlerRef = Arc<dyn FlowServiceHandler>;
pub type MetadataSnapshotHandlerRef = Arc<dyn MetadataSnapshotHandler>;

View File

@@ -12,7 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::handlers::{FlowServiceHandlerRef, ProcedureServiceHandlerRef, TableMutationHandlerRef};
use crate::handlers::{
FlowServiceHandlerRef, MetadataSnapshotHandlerRef, ProcedureServiceHandlerRef,
TableMutationHandlerRef,
};
/// Shared state for SQL functions.
/// The handlers in state may be `None` in cli command-line or test cases.
@@ -24,6 +27,8 @@ pub struct FunctionState {
pub procedure_service_handler: Option<ProcedureServiceHandlerRef>,
// The flownode handler
pub flow_service_handler: Option<FlowServiceHandlerRef>,
// The metadata snapshot handler
pub metadata_snapshot_handler: Option<MetadataSnapshotHandlerRef>,
}
impl FunctionState {
@@ -48,10 +53,14 @@ impl FunctionState {
CompactTableRequest, DeleteRequest, FlushTableRequest, InsertRequest,
};
use crate::handlers::{FlowServiceHandler, ProcedureServiceHandler, TableMutationHandler};
use crate::handlers::{
FlowServiceHandler, MetadataSnapshotHandler, ProcedureServiceHandler,
TableMutationHandler,
};
struct MockProcedureServiceHandler;
struct MockTableMutationHandler;
struct MockFlowServiceHandler;
struct MockMetadataServiceHandler;
const ROWS: usize = 42;
#[async_trait]
@@ -150,10 +159,22 @@ impl FunctionState {
}
}
#[async_trait]
impl MetadataSnapshotHandler for MockMetadataServiceHandler {
async fn dump(&self, _path: &str, _filename: &str) -> Result<String> {
Ok("test_filename".to_string())
}
async fn restore(&self, _path: &str, _filename: &str) -> Result<u64> {
Ok(100)
}
}
Self {
table_mutation_handler: Some(Arc::new(MockTableMutationHandler)),
procedure_service_handler: Some(Arc::new(MockProcedureServiceHandler)),
flow_service_handler: Some(Arc::new(MockFlowServiceHandler)),
metadata_snapshot_handler: Some(Arc::new(MockMetadataServiceHandler)),
}
}
}

View File

@@ -179,6 +179,10 @@ fn build_struct(
Ident::new("flow_service_handler", handler_type.span()),
Ident::new("MissingFlowServiceHandlerSnafu", handler_type.span()),
),
"MetadataSnapshotHandlerRef" => (
Ident::new("metadata_snapshot_handler", handler_type.span()),
Ident::new("MissingMetadataSnapshotHandlerSnafu", handler_type.span()),
),
handler => ok!(error!(
handler_type.span(),
format!("Unknown handler type: {handler}")

View File

@@ -41,6 +41,7 @@ deadpool = { workspace = true, optional = true }
deadpool-postgres = { workspace = true, optional = true }
derive_builder.workspace = true
etcd-client.workspace = true
flexbuffers = "25.2"
futures.workspace = true
futures-util.workspace = true
hex.workspace = true
@@ -48,6 +49,7 @@ humantime-serde.workspace = true
itertools.workspace = true
lazy_static.workspace = true
moka.workspace = true
object-store.workspace = true
prometheus.workspace = true
prost.workspace = true
rand.workspace = true
@@ -70,6 +72,7 @@ typetag.workspace = true
[dev-dependencies]
chrono.workspace = true
common-procedure = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
common-wal = { workspace = true, features = ["testing"] }
datatypes.workspace = true
hyper = { version = "0.14", features = ["full"] }

View File

@@ -38,7 +38,7 @@ use table::metadata::TableId;
use crate::cache_invalidator::Context;
use crate::ddl::utils::{add_peer_context_if_needed, handle_retry_error};
use crate::ddl::DdlContext;
use crate::error::{self, Result, UnexpectedSnafu};
use crate::error::{self, Result};
use crate::instruction::{CacheIdent, CreateFlow};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::flow_route::FlowRouteValue;
@@ -171,7 +171,7 @@ impl CreateFlowProcedure {
}
self.data.state = CreateFlowState::CreateFlows;
// determine flow type
self.data.flow_type = Some(get_flow_type_from_options(&self.data.task)?);
self.data.flow_type = Some(determine_flow_type(&self.data.task));
Ok(Status::executing(true))
}
@@ -196,8 +196,8 @@ impl CreateFlowProcedure {
});
}
info!(
"Creating flow({:?}, type={:?}) on flownodes with peers={:?}",
self.data.flow_id, self.data.flow_type, self.data.peers
"Creating flow({:?}) on flownodes with peers={:?}",
self.data.flow_id, self.data.peers
);
join_all(create_flow)
.await
@@ -306,20 +306,8 @@ impl Procedure for CreateFlowProcedure {
}
}
pub fn get_flow_type_from_options(flow_task: &CreateFlowTask) -> Result<FlowType> {
let flow_type = flow_task
.flow_options
.get(FlowType::FLOW_TYPE_KEY)
.map(|s| s.as_str());
match flow_type {
Some(FlowType::BATCHING) => Ok(FlowType::Batching),
Some(FlowType::STREAMING) => Ok(FlowType::Streaming),
Some(unknown) => UnexpectedSnafu {
err_msg: format!("Unknown flow type: {}", unknown),
}
.fail(),
None => Ok(FlowType::Batching),
}
pub fn determine_flow_type(_flow_task: &CreateFlowTask) -> FlowType {
FlowType::Batching
}
/// The state of [CreateFlowProcedure].

View File

@@ -46,7 +46,7 @@ pub(crate) fn test_create_flow_task(
create_if_not_exists,
expire_after: Some(300),
comment: "".to_string(),
sql: "select 1".to_string(),
sql: "raw_sql".to_string(),
flow_options: Default::default(),
}
}

View File

@@ -401,13 +401,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid flow request body: {:?}", body))]
InvalidFlowRequestBody {
body: Box<Option<api::v1::flow::flow_request::Body>>,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to get kv cache, err: {}", err_msg))]
GetKvCache { err_msg: String },
@@ -790,6 +783,76 @@ pub enum Error {
#[snafu(source)]
source: common_procedure::error::Error,
},
#[snafu(display("Invalid file path: {}", file_path))]
InvalidFilePath {
#[snafu(implicit)]
location: Location,
file_path: String,
},
#[snafu(display("Failed to serialize flexbuffers"))]
SerializeFlexbuffers {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: flexbuffers::SerializationError,
},
#[snafu(display("Failed to deserialize flexbuffers"))]
DeserializeFlexbuffers {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: flexbuffers::DeserializationError,
},
#[snafu(display("Failed to read flexbuffers"))]
ReadFlexbuffers {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: flexbuffers::ReaderError,
},
#[snafu(display("Invalid file name: {}", reason))]
InvalidFileName {
#[snafu(implicit)]
location: Location,
reason: String,
},
#[snafu(display("Invalid file extension: {}", reason))]
InvalidFileExtension {
#[snafu(implicit)]
location: Location,
reason: String,
},
#[snafu(display("Invalid file context: {}", reason))]
InvalidFileContext {
#[snafu(implicit)]
location: Location,
reason: String,
},
#[snafu(display("Failed to write object, file path: {}", file_path))]
WriteObject {
#[snafu(implicit)]
location: Location,
file_path: String,
#[snafu(source)]
error: object_store::Error,
},
#[snafu(display("Failed to read object, file path: {}", file_path))]
ReadObject {
#[snafu(implicit)]
location: Location,
file_path: String,
#[snafu(source)]
error: object_store::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -808,6 +871,8 @@ impl ErrorExt for Error {
| SerializeToJson { .. }
| DeserializeFromJson { .. } => StatusCode::Internal,
WriteObject { .. } | ReadObject { .. } => StatusCode::StorageUnavailable,
NoLeader { .. } => StatusCode::TableUnavailable,
ValueNotExist { .. } | ProcedurePoisonConflict { .. } => StatusCode::Unexpected,
@@ -844,7 +909,11 @@ impl ErrorExt for Error {
| ProcedureOutput { .. }
| FromUtf8 { .. }
| MetadataCorruption { .. }
| ParseWalOptions { .. } => StatusCode::Unexpected,
| ParseWalOptions { .. }
| ReadFlexbuffers { .. }
| SerializeFlexbuffers { .. }
| DeserializeFlexbuffers { .. }
| InvalidFileContext { .. } => StatusCode::Unexpected,
SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } => StatusCode::Internal,
@@ -861,7 +930,9 @@ impl ErrorExt for Error {
| InvalidSetDatabaseOption { .. }
| InvalidUnsetDatabaseOption { .. }
| InvalidTopicNamePrefix { .. }
| InvalidFlowRequestBody { .. } => StatusCode::InvalidArguments,
| InvalidFileExtension { .. }
| InvalidFileName { .. }
| InvalidFilePath { .. } => StatusCode::InvalidArguments,
FlowNotFound { .. } => StatusCode::FlowNotFound,
FlowRouteNotFound { .. } => StatusCode::Unexpected,

View File

@@ -43,6 +43,7 @@ pub mod region_keeper;
pub mod region_registry;
pub mod rpc;
pub mod sequence;
pub mod snapshot;
pub mod state_store;
#[cfg(any(test, feature = "testing"))]
pub mod test_util;

View File

@@ -0,0 +1,365 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod file;
use std::fmt::{Display, Formatter};
use std::time::Instant;
use common_telemetry::info;
use file::{Metadata, MetadataContent};
use futures::TryStreamExt;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt};
use strum::Display;
use crate::error::{
Error, InvalidFileExtensionSnafu, InvalidFileNameSnafu, InvalidFilePathSnafu, ReadObjectSnafu,
Result, WriteObjectSnafu,
};
use crate::kv_backend::KvBackendRef;
use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
use crate::rpc::store::{BatchPutRequest, RangeRequest};
use crate::rpc::KeyValue;
use crate::snapshot::file::{Document, KeyValue as FileKeyValue};
/// The format of the backup file.
#[derive(Debug, PartialEq, Eq, Display, Clone, Copy)]
pub enum FileFormat {
#[strum(serialize = "fb")]
FlexBuffers,
}
impl TryFrom<&str> for FileFormat {
type Error = String;
fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
match value.to_lowercase().as_str() {
"fb" => Ok(FileFormat::FlexBuffers),
_ => Err(format!("Invalid file format: {}", value)),
}
}
}
#[derive(Debug, PartialEq, Eq, Display)]
#[strum(serialize_all = "lowercase")]
pub enum DataType {
Metadata,
}
impl TryFrom<&str> for DataType {
type Error = String;
fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
match value.to_lowercase().as_str() {
"metadata" => Ok(DataType::Metadata),
_ => Err(format!("Invalid data type: {}", value)),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FileExtension {
format: FileFormat,
data_type: DataType,
}
impl FileExtension {
pub fn new(format: FileFormat, data_type: DataType) -> Self {
Self { format, data_type }
}
}
impl Display for FileExtension {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}.{}", self.data_type, self.format)
}
}
impl TryFrom<&str> for FileExtension {
type Error = Error;
fn try_from(value: &str) -> Result<Self> {
let parts = value.split(".").collect::<Vec<&str>>();
if parts.len() != 2 {
return InvalidFileExtensionSnafu {
reason: format!(
"Extension should be in the format of <datatype>.<format>, got: {}",
value
),
}
.fail();
}
let data_type = DataType::try_from(parts[0])
.map_err(|e| InvalidFileExtensionSnafu { reason: e }.build())?;
let format = FileFormat::try_from(parts[1])
.map_err(|e| InvalidFileExtensionSnafu { reason: e }.build())?;
Ok(FileExtension { format, data_type })
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FileName {
name: String,
extension: FileExtension,
}
impl Display for FileName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}.{}", self.name, self.extension)
}
}
impl TryFrom<&str> for FileName {
type Error = Error;
fn try_from(value: &str) -> Result<Self> {
let Some((name, extension)) = value.split_once(".") else {
return InvalidFileNameSnafu {
reason: format!(
"The file name should be in the format of <name>.<extension>, got: {}",
value
),
}
.fail();
};
let extension = FileExtension::try_from(extension)?;
Ok(Self {
name: name.to_string(),
extension,
})
}
}
impl FileName {
fn new(name: String, extension: FileExtension) -> Self {
Self { name, extension }
}
}
/// The manager of the metadata snapshot.
///
/// It manages the metadata snapshot, including dumping and restoring.
pub struct MetadataSnapshotManager {
kv_backend: KvBackendRef,
object_store: ObjectStore,
}
/// The maximum size of the request to put metadata, use 1MiB by default.
const MAX_REQUEST_SIZE: usize = 1024 * 1024;
impl MetadataSnapshotManager {
pub fn new(kv_backend: KvBackendRef, object_store: ObjectStore) -> Self {
Self {
kv_backend,
object_store,
}
}
/// Restores the metadata from the backup file to the metadata store.
pub async fn restore(&self, file_path: &str) -> Result<u64> {
let filename = FileName::try_from(
file_path
.rsplit("/")
.next()
.context(InvalidFilePathSnafu { file_path })?,
)?;
let data = self
.object_store
.read(file_path)
.await
.context(ReadObjectSnafu { file_path })?;
let document = Document::from_slice(&filename.extension.format, &data.to_bytes())?;
let metadata_content = document.into_metadata_content()?;
let mut req = BatchPutRequest::default();
let mut total_request_size = 0;
let mut count = 0;
let now = Instant::now();
for FileKeyValue { key, value } in metadata_content.into_iter() {
count += 1;
let key_size = key.len();
let value_size = value.len();
if total_request_size + key_size + value_size > MAX_REQUEST_SIZE {
self.kv_backend.batch_put(req).await?;
req = BatchPutRequest::default();
total_request_size = 0;
}
req.kvs.push(KeyValue { key, value });
total_request_size += key_size + value_size;
}
if !req.kvs.is_empty() {
self.kv_backend.batch_put(req).await?;
}
info!(
"Restored metadata from {} successfully, total {} key-value pairs, elapsed {:?}",
file_path,
count,
now.elapsed()
);
Ok(count)
}
/// Dumps the metadata to the backup file.
pub async fn dump(&self, path: &str, filename: &str) -> Result<(String, u64)> {
let format = FileFormat::FlexBuffers;
let filename = FileName::new(
filename.to_string(),
FileExtension {
format,
data_type: DataType::Metadata,
},
);
let file_path = format!("{}/{}", path.trim_end_matches('/'), filename);
let now = Instant::now();
let req = RangeRequest::new().with_range(vec![0], vec![0]);
let stream = PaginationStream::new(self.kv_backend.clone(), req, DEFAULT_PAGE_SIZE, |kv| {
Ok(FileKeyValue {
key: kv.key,
value: kv.value,
})
})
.into_stream();
let keyvalues = stream.try_collect::<Vec<_>>().await?;
let num_keyvalues = keyvalues.len();
let document = Document::new(
Metadata::new(),
file::Content::Metadata(MetadataContent::new(keyvalues)),
);
let bytes = document.to_bytes(&format)?;
let r = self
.object_store
.write(&file_path, bytes)
.await
.context(WriteObjectSnafu {
file_path: &file_path,
})?;
info!(
"Dumped metadata to {} successfully, total {} key-value pairs, file size {} bytes, elapsed {:?}",
file_path,
num_keyvalues,
r.content_length(),
now.elapsed()
);
Ok((filename.to_string(), num_keyvalues as u64))
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::sync::Arc;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use object_store::services::Fs;
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::kv_backend::KvBackend;
use crate::rpc::store::PutRequest;
#[test]
fn test_file_name() {
let file_name = FileName::try_from("test.metadata.fb").unwrap();
assert_eq!(file_name.name, "test");
assert_eq!(file_name.extension.format, FileFormat::FlexBuffers);
assert_eq!(file_name.extension.data_type, DataType::Metadata);
assert_eq!(file_name.to_string(), "test.metadata.fb");
let invalid_file_name = FileName::try_from("test.metadata").unwrap_err();
assert_eq!(
invalid_file_name.to_string(),
"Invalid file extension: Extension should be in the format of <datatype>.<format>, got: metadata"
);
let invalid_file_extension = FileName::try_from("test.metadata.hello").unwrap_err();
assert_eq!(
invalid_file_extension.to_string(),
"Invalid file extension: Invalid file format: hello"
);
}
fn test_env(
prefix: &str,
) -> (
TempDir,
Arc<MemoryKvBackend<Error>>,
MetadataSnapshotManager,
) {
let temp_dir = create_temp_dir(prefix);
let kv_backend = Arc::new(MemoryKvBackend::default());
let temp_path = temp_dir.path();
let data_path = temp_path.join("data").as_path().display().to_string();
let builder = Fs::default().root(&data_path);
let object_store = ObjectStore::new(builder).unwrap().finish();
let manager = MetadataSnapshotManager::new(kv_backend.clone(), object_store);
(temp_dir, kv_backend, manager)
}
#[tokio::test]
async fn test_dump_and_restore() {
common_telemetry::init_default_ut_logging();
let (temp_dir, kv_backend, manager) = test_env("test_dump_and_restore");
let temp_path = temp_dir.path();
for i in 0..10 {
kv_backend
.put(
PutRequest::new()
.with_key(format!("test_{}", i).as_bytes().to_vec())
.with_value(format!("value_{}", i).as_bytes().to_vec()),
)
.await
.unwrap();
}
let dump_path = temp_path.join("snapshot");
manager
.dump(
&dump_path.as_path().display().to_string(),
"metadata_snapshot",
)
.await
.unwrap();
// Clean up the kv backend
kv_backend.clear();
let restore_path = dump_path
.join("metadata_snapshot.metadata.fb")
.as_path()
.display()
.to_string();
manager.restore(&restore_path).await.unwrap();
for i in 0..10 {
let key = format!("test_{}", i);
let value = kv_backend.get(key.as_bytes()).await.unwrap().unwrap();
assert_eq!(value.value, format!("value_{}", i).as_bytes());
}
}
#[tokio::test]
async fn test_restore_from_nonexistent_file() {
let (temp_dir, _kv_backend, manager) = test_env("test_restore_from_nonexistent_file");
let restore_path = temp_dir
.path()
.join("nonexistent.metadata.fb")
.as_path()
.display()
.to_string();
let err = manager.restore(&restore_path).await.unwrap_err();
assert_matches!(err, Error::ReadObject { .. })
}
}

View File

@@ -0,0 +1,145 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_time::util::current_time_millis;
use flexbuffers::{FlexbufferSerializer, Reader};
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{
DeserializeFlexbuffersSnafu, ReadFlexbuffersSnafu, Result, SerializeFlexbuffersSnafu,
};
use crate::snapshot::FileFormat;
/// The layout of the backup file.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct Document {
metadata: Metadata,
content: Content,
}
impl Document {
/// Creates a new document.
pub fn new(metadata: Metadata, content: Content) -> Self {
Self { metadata, content }
}
fn serialize_to_flexbuffer(&self) -> Result<Vec<u8>> {
let mut builder = FlexbufferSerializer::new();
self.serialize(&mut builder)
.context(SerializeFlexbuffersSnafu)?;
Ok(builder.take_buffer())
}
/// Converts the [`Document`] to a bytes.
pub(crate) fn to_bytes(&self, format: &FileFormat) -> Result<Vec<u8>> {
match format {
FileFormat::FlexBuffers => self.serialize_to_flexbuffer(),
}
}
fn deserialize_from_flexbuffer(data: &[u8]) -> Result<Self> {
let reader = Reader::get_root(data).context(ReadFlexbuffersSnafu)?;
Document::deserialize(reader).context(DeserializeFlexbuffersSnafu)
}
/// Deserializes the [`Document`] from a bytes.
pub(crate) fn from_slice(format: &FileFormat, data: &[u8]) -> Result<Self> {
match format {
FileFormat::FlexBuffers => Self::deserialize_from_flexbuffer(data),
}
}
/// Converts the [`Document`] to a [`MetadataContent`].
pub(crate) fn into_metadata_content(self) -> Result<MetadataContent> {
match self.content {
Content::Metadata(metadata) => Ok(metadata),
}
}
}
/// The metadata of the backup file.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct Metadata {
// UNIX_EPOCH in milliseconds.
created_timestamp_mills: i64,
}
impl Metadata {
/// Create a new metadata.
///
/// The `created_timestamp_mills` will be the current time in milliseconds.
pub fn new() -> Self {
Self {
created_timestamp_mills: current_time_millis(),
}
}
}
/// The content of the backup file.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub(crate) enum Content {
Metadata(MetadataContent),
}
/// The content of the backup file.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct MetadataContent {
values: Vec<KeyValue>,
}
impl MetadataContent {
/// Create a new metadata content.
pub fn new(values: impl IntoIterator<Item = KeyValue>) -> Self {
Self {
values: values.into_iter().collect(),
}
}
/// Returns an iterator over the key-value pairs.
pub fn into_iter(self) -> impl Iterator<Item = KeyValue> {
self.values.into_iter()
}
}
/// The key-value pair of the backup file.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct KeyValue {
pub key: Vec<u8>,
pub value: Vec<u8>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_document() {
let document = Document::new(
Metadata::new(),
Content::Metadata(MetadataContent::new(vec![KeyValue {
key: b"key".to_vec(),
value: b"value".to_vec(),
}])),
);
let bytes = document.to_bytes(&FileFormat::FlexBuffers).unwrap();
let document_deserialized = Document::from_slice(&FileFormat::FlexBuffers, &bytes).unwrap();
assert_eq!(
document.metadata.created_timestamp_mills,
document_deserialized.metadata.created_timestamp_mills
);
assert_eq!(document.content, document_deserialized.content);
}
}

View File

@@ -162,6 +162,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to do metadata snapshot"))]
MetadataSnapshot {
source: BoxedError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to do procedure task"))]
ProcedureService {
source: BoxedError,
@@ -187,6 +194,12 @@ pub enum Error {
location: Location,
},
#[snafu(display("Missing MetadataSnapshotHandler, not expected"))]
MissingMetadataSnapshotHandler {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid function args: {}", err_msg))]
InvalidFuncArgs {
err_msg: String,
@@ -251,6 +264,7 @@ impl ErrorExt for Error {
Error::MissingTableMutationHandler { .. }
| Error::MissingProcedureServiceHandler { .. }
| Error::MissingFlowServiceHandler { .. }
| Error::MissingMetadataSnapshotHandler { .. }
| Error::RegisterUdf { .. } => StatusCode::Unexpected,
Error::UnsupportedInputDataType { .. }
@@ -262,7 +276,8 @@ impl ErrorExt for Error {
Error::DecodePlan { source, .. }
| Error::Execute { source, .. }
| Error::ProcedureService { source, .. }
| Error::TableMutation { source, .. } => source.status_code(),
| Error::TableMutation { source, .. }
| Error::MetadataSnapshot { source, .. } => source.status_code(),
Error::PermissionDenied { .. } => StatusCode::PermissionDenied,
}

View File

@@ -18,19 +18,16 @@ mod udaf;
use std::sync::Arc;
use api::v1::TableName;
use datafusion::catalog::CatalogProviderList;
use datafusion::error::Result as DatafusionResult;
use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder};
use datafusion_common::{Column, TableReference};
use datafusion_expr::dml::InsertOp;
use datafusion_expr::{col, DmlStatement, WriteOp};
use datafusion_common::Column;
use datafusion_expr::col;
pub use expr::{build_filter_from_timestamp, build_same_type_ts_filter};
use snafu::ResultExt;
pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
pub use self::udaf::AggregateFunction;
use crate::error::{GeneralDataFusionSnafu, Result};
use crate::error::Result;
use crate::logical_plan::accumulator::*;
use crate::signature::{Signature, Volatility};
@@ -82,74 +79,6 @@ pub fn rename_logical_plan_columns(
LogicalPlanBuilder::from(plan).project(projection)?.build()
}
/// Convert a insert into logical plan to an (table_name, logical_plan)
/// where table_name is the name of the table to insert into.
/// logical_plan is the plan to be executed.
///
/// if input logical plan is not `insert into table_name <input>`, return None
///
/// Returned TableName will use provided catalog and schema if not specified in the logical plan,
/// if table scan in logical plan have full table name, will **NOT** override it.
pub fn breakup_insert_plan(
plan: &LogicalPlan,
default_catalog: &str,
default_schema: &str,
) -> Option<(TableName, Arc<LogicalPlan>)> {
if let LogicalPlan::Dml(dml) = plan {
if dml.op != WriteOp::Insert(InsertOp::Append) {
return None;
}
let table_name = &dml.table_name;
let table_name = match table_name {
TableReference::Bare { table } => TableName {
catalog_name: default_catalog.to_string(),
schema_name: default_schema.to_string(),
table_name: table.to_string(),
},
TableReference::Partial { schema, table } => TableName {
catalog_name: default_catalog.to_string(),
schema_name: schema.to_string(),
table_name: table.to_string(),
},
TableReference::Full {
catalog,
schema,
table,
} => TableName {
catalog_name: catalog.to_string(),
schema_name: schema.to_string(),
table_name: table.to_string(),
},
};
let logical_plan = dml.input.clone();
Some((table_name, logical_plan))
} else {
None
}
}
/// create a `insert into table_name <input>` logical plan
pub fn add_insert_to_logical_plan(
table_name: TableName,
table_schema: datafusion_common::DFSchemaRef,
input: LogicalPlan,
) -> Result<LogicalPlan> {
let table_name = TableReference::Full {
catalog: table_name.catalog_name.into(),
schema: table_name.schema_name.into(),
table: table_name.table_name.into(),
};
let plan = LogicalPlan::Dml(DmlStatement::new(
table_name,
table_schema,
WriteOp::Insert(InsertOp::Append),
Arc::new(input),
));
let plan = plan.recompute_schema().context(GeneralDataFusionSnafu)?;
Ok(plan)
}
/// The datafusion `[LogicalPlan]` decoder.
#[async_trait::async_trait]
pub trait SubstraitPlanDecoder {

View File

@@ -30,10 +30,10 @@ pub const DEFAULT_BACKOFF_CONFIG: BackoffConfig = BackoffConfig {
deadline: Some(Duration::from_secs(120)),
};
/// Default interval for auto WAL pruning.
pub const DEFAULT_AUTO_PRUNE_INTERVAL: Duration = Duration::ZERO;
/// Default limit for concurrent auto pruning tasks.
pub const DEFAULT_AUTO_PRUNE_PARALLELISM: usize = 10;
/// Default interval for active WAL pruning.
pub const DEFAULT_ACTIVE_PRUNE_INTERVAL: Duration = Duration::ZERO;
/// Default limit for concurrent active pruning tasks.
pub const DEFAULT_ACTIVE_PRUNE_TASK_LIMIT: usize = 10;
/// Default interval for sending flush request to regions when pruning remote WAL.
pub const DEFAULT_TRIGGER_FLUSH_THRESHOLD: u64 = 0;

View File

@@ -18,8 +18,8 @@ use common_base::readable_size::ReadableSize;
use serde::{Deserialize, Serialize};
use crate::config::kafka::common::{
KafkaConnectionConfig, KafkaTopicConfig, DEFAULT_AUTO_PRUNE_INTERVAL,
DEFAULT_AUTO_PRUNE_PARALLELISM, DEFAULT_TRIGGER_FLUSH_THRESHOLD,
KafkaConnectionConfig, KafkaTopicConfig, DEFAULT_ACTIVE_PRUNE_INTERVAL,
DEFAULT_ACTIVE_PRUNE_TASK_LIMIT, DEFAULT_TRIGGER_FLUSH_THRESHOLD,
};
/// Kafka wal configurations for datanode.
@@ -47,8 +47,9 @@ pub struct DatanodeKafkaConfig {
pub dump_index_interval: Duration,
/// Ignore missing entries during read WAL.
pub overwrite_entry_start_id: bool,
// Active WAL pruning.
pub auto_prune_topic_records: bool,
// Interval of WAL pruning.
#[serde(with = "humantime_serde")]
pub auto_prune_interval: Duration,
// Threshold for sending flush request when pruning remote WAL.
// `None` stands for never sending flush request.
@@ -69,9 +70,10 @@ impl Default for DatanodeKafkaConfig {
create_index: true,
dump_index_interval: Duration::from_secs(60),
overwrite_entry_start_id: false,
auto_prune_interval: DEFAULT_AUTO_PRUNE_INTERVAL,
auto_prune_topic_records: false,
auto_prune_interval: DEFAULT_ACTIVE_PRUNE_INTERVAL,
trigger_flush_threshold: DEFAULT_TRIGGER_FLUSH_THRESHOLD,
auto_prune_parallelism: DEFAULT_AUTO_PRUNE_PARALLELISM,
auto_prune_parallelism: DEFAULT_ACTIVE_PRUNE_TASK_LIMIT,
}
}
}

View File

@@ -17,8 +17,8 @@ use std::time::Duration;
use serde::{Deserialize, Serialize};
use crate::config::kafka::common::{
KafkaConnectionConfig, KafkaTopicConfig, DEFAULT_AUTO_PRUNE_INTERVAL,
DEFAULT_AUTO_PRUNE_PARALLELISM, DEFAULT_TRIGGER_FLUSH_THRESHOLD,
KafkaConnectionConfig, KafkaTopicConfig, DEFAULT_ACTIVE_PRUNE_INTERVAL,
DEFAULT_ACTIVE_PRUNE_TASK_LIMIT, DEFAULT_TRIGGER_FLUSH_THRESHOLD,
};
/// Kafka wal configurations for metasrv.
@@ -34,7 +34,6 @@ pub struct MetasrvKafkaConfig {
// Automatically create topics for WAL.
pub auto_create_topics: bool,
// Interval of WAL pruning.
#[serde(with = "humantime_serde")]
pub auto_prune_interval: Duration,
// Threshold for sending flush request when pruning remote WAL.
// `None` stands for never sending flush request.
@@ -49,9 +48,9 @@ impl Default for MetasrvKafkaConfig {
connection: Default::default(),
kafka_topic: Default::default(),
auto_create_topics: true,
auto_prune_interval: DEFAULT_AUTO_PRUNE_INTERVAL,
auto_prune_interval: DEFAULT_ACTIVE_PRUNE_INTERVAL,
trigger_flush_threshold: DEFAULT_TRIGGER_FLUSH_THRESHOLD,
auto_prune_parallelism: DEFAULT_AUTO_PRUNE_PARALLELISM,
auto_prune_parallelism: DEFAULT_ACTIVE_PRUNE_TASK_LIMIT,
}
}
}

View File

@@ -357,6 +357,7 @@ impl DatanodeBuilder {
None,
None,
None,
None,
false,
self.plugins.clone(),
opts.query.clone(),

View File

@@ -58,7 +58,7 @@ use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_R
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
use crate::{CreateFlowArgs, FlowId, TableName};
pub(crate) mod flownode_impl;
mod flownode_impl;
mod parse_expr;
pub(crate) mod refill;
mod stat;
@@ -135,14 +135,12 @@ impl Configurable for FlownodeOptions {
}
/// Arc-ed FlowNodeManager, cheaper to clone
pub type FlowWorkerManagerRef = Arc<FlowStreamingEngine>;
pub type FlowWorkerManagerRef = Arc<FlowWorkerManager>;
/// FlowNodeManager manages the state of all tasks in the flow node, which should be run on the same thread
///
/// The choice of timestamp is just using current system timestamp for now
///
/// TODO(discord9): rename to FlowStreamingEngine
pub struct FlowStreamingEngine {
pub struct FlowWorkerManager {
/// The handler to the worker that will run the dataflow
/// which is `!Send` so a handle is used
pub worker_handles: Vec<WorkerHandle>,
@@ -160,8 +158,7 @@ pub struct FlowStreamingEngine {
flow_err_collectors: RwLock<BTreeMap<FlowId, ErrCollector>>,
src_send_buf_lens: RwLock<BTreeMap<TableId, watch::Receiver<usize>>>,
tick_manager: FlowTickManager,
/// This node id is only available in distributed mode, on standalone mode this is guaranteed to be `None`
pub node_id: Option<u32>,
node_id: Option<u32>,
/// Lock for flushing, will be `read` by `handle_inserts` and `write` by `flush_flow`
///
/// So that a series of event like `inserts -> flush` can be handled correctly
@@ -171,7 +168,7 @@ pub struct FlowStreamingEngine {
}
/// Building FlownodeManager
impl FlowStreamingEngine {
impl FlowWorkerManager {
/// set frontend invoker
pub async fn set_frontend_invoker(&self, frontend: FrontendInvoker) {
*self.frontend_invoker.write().await = Some(frontend);
@@ -190,7 +187,7 @@ impl FlowStreamingEngine {
let node_context = FlownodeContext::new(Box::new(srv_map.clone()) as _);
let tick_manager = FlowTickManager::new();
let worker_handles = Vec::new();
FlowStreamingEngine {
FlowWorkerManager {
worker_handles,
worker_selector: Mutex::new(0),
query_engine,
@@ -266,7 +263,7 @@ pub fn batches_to_rows_req(batches: Vec<Batch>) -> Result<Vec<DiffRequest>, Erro
}
/// This impl block contains methods to send writeback requests to frontend
impl FlowStreamingEngine {
impl FlowWorkerManager {
/// Return the number of requests it made
pub async fn send_writeback_requests(&self) -> Result<usize, Error> {
let all_reqs = self.generate_writeback_request().await?;
@@ -537,7 +534,7 @@ impl FlowStreamingEngine {
}
/// Flow Runtime related methods
impl FlowStreamingEngine {
impl FlowWorkerManager {
/// Start state report handler, which will receive a sender from HeartbeatTask to send state size report back
///
/// if heartbeat task is shutdown, this future will exit too
@@ -731,7 +728,7 @@ impl FlowStreamingEngine {
}
/// Create&Remove flow
impl FlowStreamingEngine {
impl FlowWorkerManager {
/// remove a flow by it's id
pub async fn remove_flow_inner(&self, flow_id: FlowId) -> Result<(), Error> {
for handle in self.worker_handles.iter() {

View File

@@ -20,379 +20,35 @@ use api::v1::flow::{
flow_request, CreateRequest, DropRequest, FlowRequest, FlowResponse, FlushFlow,
};
use api::v1::region::InsertRequests;
use catalog::CatalogManager;
use common_error::ext::BoxedError;
use common_meta::ddl::create_flow::FlowType;
use common_meta::error::Result as MetaResult;
use common_meta::key::flow::FlowMetadataManager;
use common_meta::error::{Result as MetaResult, UnexpectedSnafu};
use common_runtime::JoinHandle;
use common_telemetry::{error, info, trace, warn};
use common_telemetry::{trace, warn};
use datatypes::value::Value;
use futures::TryStreamExt;
use itertools::Itertools;
use session::context::QueryContextBuilder;
use snafu::{ensure, IntoError, OptionExt, ResultExt};
use snafu::{IntoError, OptionExt, ResultExt};
use store_api::storage::{RegionId, TableId};
use tokio::sync::{Mutex, RwLock};
use crate::adapter::{CreateFlowArgs, FlowStreamingEngine};
use crate::adapter::{CreateFlowArgs, FlowWorkerManager};
use crate::batching_mode::engine::BatchingEngine;
use crate::engine::FlowEngine;
use crate::error::{
CreateFlowSnafu, ExternalSnafu, FlowNotFoundSnafu, IllegalCheckTaskStateSnafu,
InsertIntoFlowSnafu, InternalSnafu, JoinTaskSnafu, ListFlowsSnafu, SyncCheckTaskSnafu,
UnexpectedSnafu,
};
use crate::error::{CreateFlowSnafu, FlowNotFoundSnafu, InsertIntoFlowSnafu, InternalSnafu};
use crate::metrics::METRIC_FLOW_TASK_COUNT;
use crate::repr::{self, DiffRow};
use crate::{Error, FlowId};
/// Ref to [`FlowDualEngine`]
pub type FlowDualEngineRef = Arc<FlowDualEngine>;
/// Manage both streaming and batching mode engine
///
/// including create/drop/flush flow
/// and redirect insert requests to the appropriate engine
pub struct FlowDualEngine {
streaming_engine: Arc<FlowStreamingEngine>,
streaming_engine: Arc<FlowWorkerManager>,
batching_engine: Arc<BatchingEngine>,
/// helper struct for faster query flow by table id or vice versa
src_table2flow: RwLock<SrcTableToFlow>,
flow_metadata_manager: Arc<FlowMetadataManager>,
catalog_manager: Arc<dyn CatalogManager>,
check_task: tokio::sync::Mutex<Option<ConsistentCheckTask>>,
src_table2flow: std::sync::RwLock<SrcTableToFlow>,
}
impl FlowDualEngine {
pub fn new(
streaming_engine: Arc<FlowStreamingEngine>,
batching_engine: Arc<BatchingEngine>,
flow_metadata_manager: Arc<FlowMetadataManager>,
catalog_manager: Arc<dyn CatalogManager>,
) -> Self {
Self {
streaming_engine,
batching_engine,
src_table2flow: RwLock::new(SrcTableToFlow::default()),
flow_metadata_manager,
catalog_manager,
check_task: Mutex::new(None),
}
}
pub fn streaming_engine(&self) -> Arc<FlowStreamingEngine> {
self.streaming_engine.clone()
}
pub fn batching_engine(&self) -> Arc<BatchingEngine> {
self.batching_engine.clone()
}
/// Try to sync with check task, this is only used in drop flow&flush flow, so a flow id is required
///
/// the need to sync is to make sure flush flow actually get called
async fn try_sync_with_check_task(
&self,
flow_id: FlowId,
allow_drop: bool,
) -> Result<(), Error> {
// this function rarely get called so adding some log is helpful
info!("Try to sync with check task for flow {}", flow_id);
let mut retry = 0;
let max_retry = 10;
// keep trying to trigger consistent check
while retry < max_retry {
if let Some(task) = self.check_task.lock().await.as_ref() {
task.trigger(false, allow_drop).await?;
break;
}
retry += 1;
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
}
if retry == max_retry {
error!(
"Can't sync with check task for flow {} with allow_drop={}",
flow_id, allow_drop
);
return SyncCheckTaskSnafu {
flow_id,
allow_drop,
}
.fail();
}
info!("Successfully sync with check task for flow {}", flow_id);
Ok(())
}
/// Spawn a task to consistently check if all flow tasks in metasrv is created on flownode,
/// so on startup, this will create all missing flow tasks, and constantly check at a interval
async fn check_flow_consistent(
&self,
allow_create: bool,
allow_drop: bool,
) -> Result<(), Error> {
// use nodeid to determine if this is standalone/distributed mode, and retrieve all flows in this node(in distributed mode)/or all flows(in standalone mode)
let nodeid = self.streaming_engine.node_id;
let should_exists: Vec<_> = if let Some(nodeid) = nodeid {
// nodeid is available, so we only need to check flows on this node
// which also means we are in distributed mode
let to_be_recover = self
.flow_metadata_manager
.flownode_flow_manager()
.flows(nodeid.into())
.try_collect::<Vec<_>>()
.await
.context(ListFlowsSnafu {
id: Some(nodeid.into()),
})?;
to_be_recover.into_iter().map(|(id, _)| id).collect()
} else {
// nodeid is not available, so we need to check all flows
// which also means we are in standalone mode
let all_catalogs = self
.catalog_manager
.catalog_names()
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let mut all_flow_ids = vec![];
for catalog in all_catalogs {
let flows = self
.flow_metadata_manager
.flow_name_manager()
.flow_names(&catalog)
.await
.try_collect::<Vec<_>>()
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
all_flow_ids.extend(flows.into_iter().map(|(_, id)| id.flow_id()));
}
all_flow_ids
};
let should_exists = should_exists
.into_iter()
.map(|i| i as FlowId)
.collect::<HashSet<_>>();
let actual_exists = self.list_flows().await?.into_iter().collect::<HashSet<_>>();
let to_be_created = should_exists
.iter()
.filter(|id| !actual_exists.contains(id))
.collect::<Vec<_>>();
let to_be_dropped = actual_exists
.iter()
.filter(|id| !should_exists.contains(id))
.collect::<Vec<_>>();
if !to_be_created.is_empty() {
if allow_create {
info!(
"Recovering {} flows: {:?}",
to_be_created.len(),
to_be_created
);
let mut errors = vec![];
for flow_id in to_be_created {
let flow_id = *flow_id;
let info = self
.flow_metadata_manager
.flow_info_manager()
.get(flow_id as u32)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.context(FlowNotFoundSnafu { id: flow_id })?;
let sink_table_name = [
info.sink_table_name().catalog_name.clone(),
info.sink_table_name().schema_name.clone(),
info.sink_table_name().table_name.clone(),
];
let args = CreateFlowArgs {
flow_id,
sink_table_name,
source_table_ids: info.source_table_ids().to_vec(),
// because recover should only happen on restart the `create_if_not_exists` and `or_replace` can be arbitrary value(since flow doesn't exist)
// but for the sake of consistency and to make sure recover of flow actually happen, we set both to true
// (which is also fine since checks for not allow both to be true is on metasrv and we already pass that)
create_if_not_exists: true,
or_replace: true,
expire_after: info.expire_after(),
comment: Some(info.comment().clone()),
sql: info.raw_sql().clone(),
flow_options: info.options().clone(),
query_ctx: Some(
QueryContextBuilder::default()
.current_catalog(info.catalog_name().clone())
.build(),
),
};
if let Err(err) = self
.create_flow(args)
.await
.map_err(BoxedError::new)
.with_context(|_| CreateFlowSnafu {
sql: info.raw_sql().clone(),
})
{
errors.push((flow_id, err));
}
}
for (flow_id, err) in errors {
warn!("Failed to recreate flow {}, err={:#?}", flow_id, err);
}
} else {
warn!(
"Flownode {:?} found flows not exist in flownode, flow_ids={:?}",
nodeid, to_be_created
);
}
}
if !to_be_dropped.is_empty() {
if allow_drop {
info!("Dropping flows: {:?}", to_be_dropped);
let mut errors = vec![];
for flow_id in to_be_dropped {
let flow_id = *flow_id;
if let Err(err) = self.remove_flow(flow_id).await {
errors.push((flow_id, err));
}
}
for (flow_id, err) in errors {
warn!("Failed to drop flow {}, err={:#?}", flow_id, err);
}
} else {
warn!(
"Flownode {:?} found flows not exist in flownode, flow_ids={:?}",
nodeid, to_be_dropped
);
}
}
Ok(())
}
// TODO(discord9): consider sync this with heartbeat(might become necessary in the future)
pub async fn start_flow_consistent_check_task(self: &Arc<Self>) -> Result<(), Error> {
let mut check_task = self.check_task.lock().await;
ensure!(
check_task.is_none(),
IllegalCheckTaskStateSnafu {
reason: "Flow consistent check task already exists",
}
);
let task = ConsistentCheckTask::start_check_task(self).await?;
*check_task = Some(task);
Ok(())
}
pub async fn stop_flow_consistent_check_task(&self) -> Result<(), Error> {
info!("Stopping flow consistent check task");
let mut check_task = self.check_task.lock().await;
ensure!(
check_task.is_some(),
IllegalCheckTaskStateSnafu {
reason: "Flow consistent check task does not exist",
}
);
check_task.take().expect("Already checked").stop().await?;
info!("Stopped flow consistent check task");
Ok(())
}
async fn flow_exist_in_metadata(&self, flow_id: FlowId) -> Result<bool, Error> {
self.flow_metadata_manager
.flow_info_manager()
.get(flow_id as u32)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)
.map(|info| info.is_some())
}
}
struct ConsistentCheckTask {
handle: JoinHandle<()>,
shutdown_tx: tokio::sync::mpsc::Sender<()>,
trigger_tx: tokio::sync::mpsc::Sender<(bool, bool, tokio::sync::oneshot::Sender<()>)>,
}
impl ConsistentCheckTask {
async fn start_check_task(engine: &Arc<FlowDualEngine>) -> Result<Self, Error> {
// first do recover flows
engine.check_flow_consistent(true, false).await?;
let inner = engine.clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
let (trigger_tx, mut trigger_rx) =
tokio::sync::mpsc::channel::<(bool, bool, tokio::sync::oneshot::Sender<()>)>(10);
let handle = common_runtime::spawn_global(async move {
let mut args = (false, false);
let mut ret_signal: Option<tokio::sync::oneshot::Sender<()>> = None;
loop {
if let Err(err) = inner.check_flow_consistent(args.0, args.1).await {
error!(err; "Failed to check flow consistent");
}
if let Some(done) = ret_signal.take() {
let _ = done.send(());
}
tokio::select! {
_ = rx.recv() => break,
incoming = trigger_rx.recv() => if let Some(incoming) = incoming {
args = (incoming.0, incoming.1);
ret_signal = Some(incoming.2);
},
_ = tokio::time::sleep(std::time::Duration::from_secs(10)) => args=(false,false),
}
}
});
Ok(ConsistentCheckTask {
handle,
shutdown_tx: tx,
trigger_tx,
})
}
async fn trigger(&self, allow_create: bool, allow_drop: bool) -> Result<(), Error> {
let (tx, rx) = tokio::sync::oneshot::channel();
self.trigger_tx
.send((allow_create, allow_drop, tx))
.await
.map_err(|_| {
IllegalCheckTaskStateSnafu {
reason: "Failed to send trigger signal",
}
.build()
})?;
rx.await.map_err(|_| {
IllegalCheckTaskStateSnafu {
reason: "Failed to receive trigger signal",
}
.build()
})?;
Ok(())
}
async fn stop(self) -> Result<(), Error> {
self.shutdown_tx.send(()).await.map_err(|_| {
IllegalCheckTaskStateSnafu {
reason: "Failed to send shutdown signal",
}
.build()
})?;
// abort so no need to wait
self.handle.abort();
Ok(())
}
}
#[derive(Default)]
struct SrcTableToFlow {
/// mapping of table ids to flow ids for streaming mode
stream: HashMap<TableId, HashSet<FlowId>>,
@@ -482,49 +138,35 @@ impl FlowEngine for FlowDualEngine {
self.src_table2flow
.write()
.await
.unwrap()
.add_flow(flow_id, flow_type, src_table_ids);
Ok(res)
}
async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
let flow_type = self.src_table2flow.read().await.get_flow_type(flow_id);
let flow_type = self.src_table2flow.read().unwrap().get_flow_type(flow_id);
match flow_type {
Some(FlowType::Batching) => self.batching_engine.remove_flow(flow_id).await,
Some(FlowType::Streaming) => self.streaming_engine.remove_flow(flow_id).await,
None => {
// this can happen if flownode just restart, and is stilling creating the flow
// since now that this flow should dropped, we need to trigger the consistent check and allow drop
// this rely on drop flow ddl delete metadata first, see src/common/meta/src/ddl/drop_flow.rs
warn!(
"Flow {} is not exist in the underlying engine, but exist in metadata",
flow_id
);
self.try_sync_with_check_task(flow_id, true).await?;
Ok(())
}
None => FlowNotFoundSnafu { id: flow_id }.fail(),
}?;
// remove mapping
self.src_table2flow.write().await.remove_flow(flow_id);
self.src_table2flow.write().unwrap().remove_flow(flow_id);
Ok(())
}
async fn flush_flow(&self, flow_id: FlowId) -> Result<usize, Error> {
// sync with check task
self.try_sync_with_check_task(flow_id, false).await?;
let flow_type = self.src_table2flow.read().await.get_flow_type(flow_id);
let flow_type = self.src_table2flow.read().unwrap().get_flow_type(flow_id);
match flow_type {
Some(FlowType::Batching) => self.batching_engine.flush_flow(flow_id).await,
Some(FlowType::Streaming) => self.streaming_engine.flush_flow(flow_id).await,
None => Ok(0),
None => FlowNotFoundSnafu { id: flow_id }.fail(),
}
}
async fn flow_exist(&self, flow_id: FlowId) -> Result<bool, Error> {
let flow_type = self.src_table2flow.read().await.get_flow_type(flow_id);
let flow_type = self.src_table2flow.read().unwrap().get_flow_type(flow_id);
// not using `flow_type.is_some()` to make sure the flow is actually exist in the underlying engine
match flow_type {
Some(FlowType::Batching) => self.batching_engine.flow_exist(flow_id).await,
@@ -533,13 +175,6 @@ impl FlowEngine for FlowDualEngine {
}
}
async fn list_flows(&self) -> Result<impl IntoIterator<Item = FlowId>, Error> {
let stream_flows = self.streaming_engine.list_flows().await?;
let batch_flows = self.batching_engine.list_flows().await?;
Ok(stream_flows.into_iter().chain(batch_flows))
}
async fn handle_flow_inserts(
&self,
request: api::v1::region::InsertRequests,
@@ -549,7 +184,7 @@ impl FlowEngine for FlowDualEngine {
let mut to_batch_engine = request.requests;
{
let src_table2flow = self.src_table2flow.read().await;
let src_table2flow = self.src_table2flow.read().unwrap();
to_batch_engine.retain(|req| {
let region_id = RegionId::from(req.region_id);
let table_id = region_id.table_id();
@@ -586,7 +221,12 @@ impl FlowEngine for FlowDualEngine {
requests: to_batch_engine,
})
.await?;
stream_handler.await.context(JoinTaskSnafu)??;
stream_handler.await.map_err(|e| {
crate::error::UnexpectedSnafu {
reason: format!("JoinError when handle inserts for flow stream engine: {e:?}"),
}
.build()
})??;
Ok(())
}
@@ -667,7 +307,14 @@ impl common_meta::node_manager::Flownode for FlowDualEngine {
..Default::default()
})
}
other => common_meta::error::InvalidFlowRequestBodySnafu { body: other }.fail(),
None => UnexpectedSnafu {
err_msg: "Missing request body",
}
.fail(),
_ => UnexpectedSnafu {
err_msg: "Invalid request body.",
}
.fail(),
}
}
@@ -692,7 +339,7 @@ fn to_meta_err(
}
#[async_trait::async_trait]
impl common_meta::node_manager::Flownode for FlowStreamingEngine {
impl common_meta::node_manager::Flownode for FlowWorkerManager {
async fn handle(&self, request: FlowRequest) -> MetaResult<FlowResponse> {
let query_ctx = request
.header
@@ -766,7 +413,14 @@ impl common_meta::node_manager::Flownode for FlowStreamingEngine {
..Default::default()
})
}
other => common_meta::error::InvalidFlowRequestBodySnafu { body: other }.fail(),
None => UnexpectedSnafu {
err_msg: "Missing request body",
}
.fail(),
_ => UnexpectedSnafu {
err_msg: "Invalid request body.",
}
.fail(),
}
}
@@ -778,7 +432,7 @@ impl common_meta::node_manager::Flownode for FlowStreamingEngine {
}
}
impl FlowEngine for FlowStreamingEngine {
impl FlowEngine for FlowWorkerManager {
async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
self.create_flow_inner(args).await
}
@@ -795,16 +449,6 @@ impl FlowEngine for FlowStreamingEngine {
self.flow_exist_inner(flow_id).await
}
async fn list_flows(&self) -> Result<impl IntoIterator<Item = FlowId>, Error> {
Ok(self
.flow_err_collectors
.read()
.await
.keys()
.cloned()
.collect::<Vec<_>>())
}
async fn handle_flow_inserts(
&self,
request: api::v1::region::InsertRequests,
@@ -830,7 +474,7 @@ impl FetchFromRow {
}
}
impl FlowStreamingEngine {
impl FlowWorkerManager {
async fn handle_inserts_inner(
&self,
request: InsertRequests,
@@ -908,7 +552,7 @@ impl FlowStreamingEngine {
.copied()
.map(FetchFromRow::Idx)
.or_else(|| col_default_val.clone().map(FetchFromRow::Default))
.with_context(|| UnexpectedSnafu {
.with_context(|| crate::error::UnexpectedSnafu {
reason: format!(
"Column not found: {}, default_value: {:?}",
col_name, col_default_val

View File

@@ -31,7 +31,7 @@ use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableId;
use crate::adapter::table_source::ManagedTableSource;
use crate::adapter::{FlowId, FlowStreamingEngine, FlowWorkerManagerRef};
use crate::adapter::{FlowId, FlowWorkerManager, FlowWorkerManagerRef};
use crate::error::{FlowNotFoundSnafu, JoinTaskSnafu, UnexpectedSnafu};
use crate::expr::error::ExternalSnafu;
use crate::expr::utils::find_plan_time_window_expr_lower_bound;
@@ -39,7 +39,7 @@ use crate::repr::RelationDesc;
use crate::server::get_all_flow_ids;
use crate::{Error, FrontendInvoker};
impl FlowStreamingEngine {
impl FlowWorkerManager {
/// Create and start refill flow tasks in background
pub async fn create_and_start_refill_flow_tasks(
self: &FlowWorkerManagerRef,

View File

@@ -16,9 +16,9 @@ use std::collections::BTreeMap;
use common_meta::key::flow::flow_state::FlowStat;
use crate::FlowStreamingEngine;
use crate::FlowWorkerManager;
impl FlowStreamingEngine {
impl FlowWorkerManager {
pub async fn gen_state_report(&self) -> FlowStat {
let mut full_report = BTreeMap::new();
let mut last_exec_time_map = BTreeMap::new();

View File

@@ -33,8 +33,8 @@ use crate::adapter::table_source::TableDesc;
use crate::adapter::{TableName, WorkerHandle, AUTO_CREATED_PLACEHOLDER_TS_COL};
use crate::error::{Error, ExternalSnafu, UnexpectedSnafu};
use crate::repr::{ColumnType, RelationDesc, RelationType};
use crate::FlowStreamingEngine;
impl FlowStreamingEngine {
use crate::FlowWorkerManager;
impl FlowWorkerManager {
/// Get a worker handle for creating flow, using round robin to select a worker
pub(crate) async fn get_worker_handle_for_create_flow(&self) -> &WorkerHandle {
let use_idx = {

View File

@@ -17,16 +17,14 @@
use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;
use catalog::CatalogManagerRef;
use common_error::ext::BoxedError;
use common_meta::ddl::create_flow::FlowType;
use common_meta::key::flow::FlowMetadataManagerRef;
use common_meta::key::table_info::{TableInfoManager, TableInfoValue};
use common_meta::key::table_info::TableInfoManager;
use common_meta::key::TableMetadataManagerRef;
use common_runtime::JoinHandle;
use common_telemetry::info;
use common_telemetry::tracing::warn;
use common_telemetry::{debug, info};
use common_time::TimeToLive;
use query::QueryEngineRef;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionId;
@@ -38,9 +36,7 @@ use crate::batching_mode::task::BatchingTask;
use crate::batching_mode::time_window::{find_time_window_expr, TimeWindowExpr};
use crate::batching_mode::utils::sql_to_df_plan;
use crate::engine::FlowEngine;
use crate::error::{
ExternalSnafu, FlowAlreadyExistSnafu, TableNotFoundMetaSnafu, UnexpectedSnafu, UnsupportedSnafu,
};
use crate::error::{ExternalSnafu, FlowAlreadyExistSnafu, TableNotFoundMetaSnafu, UnexpectedSnafu};
use crate::{CreateFlowArgs, Error, FlowId, TableName};
/// Batching mode Engine, responsible for driving all the batching mode tasks
@@ -52,7 +48,6 @@ pub struct BatchingEngine {
frontend_client: Arc<FrontendClient>,
flow_metadata_manager: FlowMetadataManagerRef,
table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef,
query_engine: QueryEngineRef,
}
@@ -62,7 +57,6 @@ impl BatchingEngine {
query_engine: QueryEngineRef,
flow_metadata_manager: FlowMetadataManagerRef,
table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef,
) -> Self {
Self {
tasks: Default::default(),
@@ -70,7 +64,6 @@ impl BatchingEngine {
frontend_client,
flow_metadata_manager,
table_meta,
catalog_manager,
query_engine,
}
}
@@ -186,16 +179,6 @@ async fn get_table_name(
table_info: &TableInfoManager,
table_id: &TableId,
) -> Result<TableName, Error> {
get_table_info(table_info, table_id).await.map(|info| {
let name = info.table_name();
[name.catalog_name, name.schema_name, name.table_name]
})
}
async fn get_table_info(
table_info: &TableInfoManager,
table_id: &TableId,
) -> Result<TableInfoValue, Error> {
table_info
.get(*table_id)
.await
@@ -204,7 +187,8 @@ async fn get_table_info(
.with_context(|| UnexpectedSnafu {
reason: format!("Table id = {:?}, couldn't found table name", table_id),
})
.map(|info| info.into_inner())
.map(|name| name.table_name())
.map(|name| [name.catalog_name, name.schema_name, name.table_name])
}
impl BatchingEngine {
@@ -264,19 +248,7 @@ impl BatchingEngine {
let query_ctx = Arc::new(query_ctx);
let mut source_table_names = Vec::with_capacity(2);
for src_id in source_table_ids {
// also check table option to see if ttl!=instant
let table_name = get_table_name(self.table_meta.table_info_manager(), &src_id).await?;
let table_info = get_table_info(self.table_meta.table_info_manager(), &src_id).await?;
if table_info.table_info.meta.options.ttl == Some(TimeToLive::Instant) {
UnsupportedSnafu {
reason: format!(
"Source table `{}`(id={}) has instant TTL, Instant TTL is not supported under batching mode. Consider using a TTL longer than flush interval",
table_name.join("."),
src_id
),
}
.fail()?;
}
source_table_names.push(table_name);
}
@@ -301,14 +273,7 @@ impl BatchingEngine {
})
.transpose()?;
info!(
"Flow id={}, found time window expr={}",
flow_id,
phy_expr
.as_ref()
.map(|phy_expr| phy_expr.to_string())
.unwrap_or("None".to_string())
);
info!("Flow id={}, found time window expr={:?}", flow_id, phy_expr);
let task = BatchingTask::new(
flow_id,
@@ -319,7 +284,7 @@ impl BatchingEngine {
sink_table_name,
source_table_names,
query_ctx,
self.catalog_manager.clone(),
self.table_meta.clone(),
rx,
);
@@ -330,11 +295,10 @@ impl BatchingEngine {
// check execute once first to detect any error early
task.check_execute(&engine, &frontend).await?;
// TODO(discord9): use time wheel or what for better
let handle = common_runtime::spawn_global(async move {
// TODO(discord9): also save handle & use time wheel or what for better
let _handle = common_runtime::spawn_global(async move {
task_inner.start_executing_loop(engine, frontend).await;
});
task.state.write().unwrap().task_handle = Some(handle);
// only replace here not earlier because we want the old one intact if something went wrong before this line
let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
@@ -362,23 +326,15 @@ impl BatchingEngine {
}
pub async fn flush_flow_inner(&self, flow_id: FlowId) -> Result<usize, Error> {
debug!("Try flush flow {flow_id}");
let task = self.tasks.read().await.get(&flow_id).cloned();
let task = task.with_context(|| UnexpectedSnafu {
reason: format!("Can't found task for flow {flow_id}"),
})?;
task.mark_all_windows_as_dirty()?;
let res = task
.gen_exec_once(&self.query_engine, &self.frontend_client)
.await?;
let affected_rows = res.map(|(r, _)| r).unwrap_or_default() as usize;
debug!(
"Successfully flush flow {flow_id}, affected rows={}",
affected_rows
);
Ok(affected_rows)
}
@@ -401,9 +357,6 @@ impl FlowEngine for BatchingEngine {
async fn flow_exist(&self, flow_id: FlowId) -> Result<bool, Error> {
Ok(self.flow_exist_inner(flow_id).await)
}
async fn list_flows(&self) -> Result<impl IntoIterator<Item = FlowId>, Error> {
Ok(self.tasks.read().await.keys().cloned().collect::<Vec<_>>())
}
async fn handle_flow_inserts(
&self,
request: api::v1::region::InsertRequests,

View File

@@ -14,105 +14,44 @@
//! Frontend client to run flow as batching task which is time-window-aware normal query triggered every tick set by user
use std::sync::{Arc, Weak};
use std::sync::Arc;
use api::v1::greptime_request::Request;
use api::v1::CreateTableExpr;
use client::{Client, Database};
use common_error::ext::{BoxedError, ErrorExt};
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
use common_meta::peer::Peer;
use common_meta::rpc::store::RangeRequest;
use common_query::Output;
use meta_client::client::MetaClient;
use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use snafu::ResultExt;
use crate::batching_mode::DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT;
use crate::error::{ExternalSnafu, InvalidRequestSnafu, UnexpectedSnafu};
use crate::error::{ExternalSnafu, UnexpectedSnafu};
use crate::Error;
/// Just like [`GrpcQueryHandler`] but use BoxedError
///
/// basically just a specialized `GrpcQueryHandler<Error=BoxedError>`
///
/// this is only useful for flownode to
/// invoke frontend Instance in standalone mode
#[async_trait::async_trait]
pub trait GrpcQueryHandlerWithBoxedError: Send + Sync + 'static {
async fn do_query(
&self,
query: Request,
ctx: QueryContextRef,
) -> std::result::Result<Output, BoxedError>;
fn default_channel_mgr() -> ChannelManager {
let cfg = ChannelConfig::new().timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT);
ChannelManager::with_config(cfg)
}
/// auto impl
#[async_trait::async_trait]
impl<
E: ErrorExt + Send + Sync + 'static,
T: GrpcQueryHandler<Error = E> + Send + Sync + 'static,
> GrpcQueryHandlerWithBoxedError for T
{
async fn do_query(
&self,
query: Request,
ctx: QueryContextRef,
) -> std::result::Result<Output, BoxedError> {
self.do_query(query, ctx).await.map_err(BoxedError::new)
}
fn client_from_urls(addrs: Vec<String>) -> Client {
Client::with_manager_and_urls(default_channel_mgr(), addrs)
}
type HandlerMutable = Arc<std::sync::Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>;
/// A simple frontend client able to execute sql using grpc protocol
///
/// This is for computation-heavy query which need to offload computation to frontend, lifting the load from flownode
#[derive(Debug, Clone)]
#[derive(Debug)]
pub enum FrontendClient {
Distributed {
meta_client: Arc<MetaClient>,
chnl_mgr: ChannelManager,
},
Standalone {
/// for the sake of simplicity still use grpc even in standalone mode
/// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
/// TODO(discord9): not use grpc under standalone mode
database_client: HandlerMutable,
database_client: DatabaseWithPeer,
},
}
impl FrontendClient {
/// Create a new empty frontend client, with a `HandlerMutable` to set the grpc handler later
pub fn from_empty_grpc_handler() -> (Self, HandlerMutable) {
let handler = Arc::new(std::sync::Mutex::new(None));
(
Self::Standalone {
database_client: handler.clone(),
},
handler,
)
}
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
Self::Distributed {
meta_client,
chnl_mgr: {
let cfg = ChannelConfig::new().timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT);
ChannelManager::with_config(cfg)
},
}
}
pub fn from_grpc_handler(grpc_handler: Weak<dyn GrpcQueryHandlerWithBoxedError>) -> Self {
Self::Standalone {
database_client: Arc::new(std::sync::Mutex::new(Some(grpc_handler))),
}
}
}
#[derive(Debug, Clone)]
pub struct DatabaseWithPeer {
pub database: Database,
@@ -125,6 +64,25 @@ impl DatabaseWithPeer {
}
}
impl FrontendClient {
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
Self::Distributed { meta_client }
}
pub fn from_static_grpc_addr(addr: String) -> Self {
let peer = Peer {
id: 0,
addr: addr.clone(),
};
let client = client_from_urls(vec![addr]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Self::Standalone {
database_client: DatabaseWithPeer::new(database, peer),
}
}
}
impl FrontendClient {
async fn scan_for_frontend(&self) -> Result<Vec<(NodeInfoKey, NodeInfo)>, Error> {
let Self::Distributed { meta_client, .. } = self else {
@@ -157,21 +115,10 @@ impl FrontendClient {
}
/// Get the database with max `last_activity_ts`
async fn get_last_active_frontend(
&self,
catalog: &str,
schema: &str,
) -> Result<DatabaseWithPeer, Error> {
let Self::Distributed {
meta_client: _,
chnl_mgr,
} = self
else {
return UnexpectedSnafu {
reason: "Expect distributed mode",
}
.fail();
};
async fn get_last_active_frontend(&self) -> Result<DatabaseWithPeer, Error> {
if let Self::Standalone { database_client } = self {
return Ok(database_client.clone());
}
let frontends = self.scan_for_frontend().await?;
let mut peer = None;
@@ -186,119 +133,16 @@ impl FrontendClient {
}
.fail()?
};
let client = Client::with_manager_and_urls(chnl_mgr.clone(), vec![peer.addr.clone()]);
let database = Database::new(catalog, schema, client);
let client = client_from_urls(vec![peer.addr.clone()]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Ok(DatabaseWithPeer::new(database, peer))
}
pub async fn create(
&self,
create: CreateTableExpr,
catalog: &str,
schema: &str,
) -> Result<u32, Error> {
self.handle(
Request::Ddl(api::v1::DdlRequest {
expr: Some(api::v1::ddl_request::Expr::CreateTable(create)),
}),
catalog,
schema,
&mut None,
)
.await
}
/// Handle a request to frontend
pub(crate) async fn handle(
&self,
req: api::v1::greptime_request::Request,
catalog: &str,
schema: &str,
peer_desc: &mut Option<PeerDesc>,
) -> Result<u32, Error> {
/// Get a database client, and possibly update it before returning.
pub async fn get_database_client(&self) -> Result<DatabaseWithPeer, Error> {
match self {
FrontendClient::Distributed { .. } => {
let db = self.get_last_active_frontend(catalog, schema).await?;
*peer_desc = Some(PeerDesc::Dist {
peer: db.peer.clone(),
});
db.database
.handle(req.clone())
.await
.with_context(|_| InvalidRequestSnafu {
context: format!("Failed to handle request: {:?}", req),
})
}
FrontendClient::Standalone { database_client } => {
let ctx = QueryContextBuilder::default()
.current_catalog(catalog.to_string())
.current_schema(schema.to_string())
.build();
let ctx = Arc::new(ctx);
{
let database_client = {
database_client
.lock()
.map_err(|e| {
UnexpectedSnafu {
reason: format!("Failed to lock database client: {e}"),
}
.build()
})?
.as_ref()
.context(UnexpectedSnafu {
reason: "Standalone's frontend instance is not set",
})?
.upgrade()
.context(UnexpectedSnafu {
reason: "Failed to upgrade database client",
})?
};
let resp: common_query::Output = database_client
.do_query(req.clone(), ctx)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
match resp.data {
common_query::OutputData::AffectedRows(rows) => {
Ok(rows.try_into().map_err(|_| {
UnexpectedSnafu {
reason: format!("Failed to convert rows to u32: {}", rows),
}
.build()
})?)
}
_ => UnexpectedSnafu {
reason: "Unexpected output data",
}
.fail(),
}
}
}
}
}
}
/// Describe a peer of frontend
#[derive(Debug, Default)]
pub(crate) enum PeerDesc {
/// Distributed mode's frontend peer address
Dist {
/// frontend peer address
peer: Peer,
},
/// Standalone mode
#[default]
Standalone,
}
impl std::fmt::Display for PeerDesc {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PeerDesc::Dist { peer } => write!(f, "{}", peer.addr),
PeerDesc::Standalone => write!(f, "standalone"),
Self::Standalone { database_client } => Ok(database_client.clone()),
Self::Distributed { meta_client: _ } => self.get_last_active_frontend().await,
}
}
}

View File

@@ -22,14 +22,13 @@ use common_telemetry::tracing::warn;
use common_time::Timestamp;
use datatypes::value::Value;
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
use snafu::ResultExt;
use tokio::sync::oneshot;
use tokio::time::Instant;
use crate::batching_mode::task::BatchingTask;
use crate::batching_mode::time_window::TimeWindowExpr;
use crate::batching_mode::MIN_REFRESH_DURATION;
use crate::error::{DatatypesSnafu, InternalSnafu, TimeSnafu, UnexpectedSnafu};
use crate::error::{DatatypesSnafu, InternalSnafu, TimeSnafu};
use crate::{Error, FlowId};
/// The state of the [`BatchingTask`].
@@ -47,8 +46,6 @@ pub struct TaskState {
exec_state: ExecState,
/// Shutdown receiver
pub(crate) shutdown_rx: oneshot::Receiver<()>,
/// Task handle
pub(crate) task_handle: Option<tokio::task::JoinHandle<()>>,
}
impl TaskState {
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
@@ -59,7 +56,6 @@ impl TaskState {
dirty_time_windows: Default::default(),
exec_state: ExecState::Idle,
shutdown_rx,
task_handle: None,
}
}
@@ -74,11 +70,7 @@ impl TaskState {
/// wait for at least `last_query_duration`, at most `max_timeout` to start next query
///
/// if have more dirty time window, exec next query immediately
pub fn get_next_start_query_time(
&self,
flow_id: FlowId,
max_timeout: Option<Duration>,
) -> Instant {
pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
let next_duration = max_timeout
.unwrap_or(self.last_query_duration)
.min(self.last_query_duration);
@@ -88,12 +80,6 @@ impl TaskState {
if self.dirty_time_windows.windows.is_empty() {
self.last_update_time + next_duration
} else {
debug!(
"Flow id = {}, still have {} dirty time window({:?}), execute immediately",
flow_id,
self.dirty_time_windows.windows.len(),
self.dirty_time_windows.windows
);
Instant::now()
}
}
@@ -129,15 +115,6 @@ impl DirtyTimeWindows {
}
}
pub fn add_window(&mut self, start: Timestamp, end: Option<Timestamp>) {
self.windows.insert(start, end);
}
/// Clean all dirty time windows, useful when can't found time window expr
pub fn clean(&mut self) {
self.windows.clear();
}
/// Generate all filter expressions consuming all time windows
pub fn gen_filter_exprs(
&mut self,
@@ -200,18 +177,6 @@ impl DirtyTimeWindows {
let mut expr_lst = vec![];
for (start, end) in first_nth.into_iter() {
// align using time window exprs
let (start, end) = if let Some(ctx) = task_ctx {
let Some(time_window_expr) = &ctx.config.time_window_expr else {
UnexpectedSnafu {
reason: "time_window_expr is not set",
}
.fail()?
};
self.align_time_window(start, end, time_window_expr)?
} else {
(start, end)
};
debug!(
"Time window start: {:?}, end: {:?}",
start.to_iso8601_string(),
@@ -234,30 +199,6 @@ impl DirtyTimeWindows {
Ok(expr)
}
fn align_time_window(
&self,
start: Timestamp,
end: Option<Timestamp>,
time_window_expr: &TimeWindowExpr,
) -> Result<(Timestamp, Option<Timestamp>), Error> {
let align_start = time_window_expr.eval(start)?.0.context(UnexpectedSnafu {
reason: format!(
"Failed to align start time {:?} with time window expr {:?}",
start, time_window_expr
),
})?;
let align_end = end
.and_then(|end| {
time_window_expr
.eval(end)
// if after aligned, end is the same, then use end(because it's already aligned) else use aligned end
.map(|r| if r.0 == Some(end) { r.0 } else { r.1 })
.transpose()
})
.transpose()?;
Ok((align_start, align_end))
}
/// Merge time windows that overlaps or get too close
pub fn merge_dirty_time_windows(
&mut self,
@@ -346,12 +287,8 @@ enum ExecState {
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use session::context::QueryContext;
use super::*;
use crate::batching_mode::time_window::find_time_window_expr;
use crate::batching_mode::utils::sql_to_df_plan;
use crate::test_utils::create_test_query_engine;
#[test]
fn test_merge_dirty_time_windows() {
@@ -467,59 +404,4 @@ mod test {
assert_eq!(expected_filter_expr, to_sql.as_deref());
}
}
#[tokio::test]
async fn test_align_time_window() {
type TimeWindow = (Timestamp, Option<Timestamp>);
struct TestCase {
sql: String,
aligns: Vec<(TimeWindow, TimeWindow)>,
}
let testcases: Vec<TestCase> = vec![TestCase{
sql: "SELECT date_bin(INTERVAL '5 second', ts) AS time_window FROM numbers_with_ts GROUP BY time_window;".to_string(),
aligns: vec![
((Timestamp::new_second(3), None), (Timestamp::new_second(0), None)),
((Timestamp::new_second(8), None), (Timestamp::new_second(5), None)),
((Timestamp::new_second(8), Some(Timestamp::new_second(10))), (Timestamp::new_second(5), Some(Timestamp::new_second(10)))),
((Timestamp::new_second(8), Some(Timestamp::new_second(9))), (Timestamp::new_second(5), Some(Timestamp::new_second(10)))),
],
}];
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
for TestCase { sql, aligns } in testcases {
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), &sql, true)
.await
.unwrap();
let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
&plan,
query_engine.engine_state().catalog_manager().clone(),
ctx.clone(),
)
.await
.unwrap();
let time_window_expr = time_window_expr
.map(|expr| {
TimeWindowExpr::from_expr(
&expr,
&column_name,
&df_schema,
&query_engine.engine_state().session_state(),
)
})
.transpose()
.unwrap()
.unwrap();
let dirty = DirtyTimeWindows::default();
for (before_align, expected_after_align) in aligns {
let after_align = dirty
.align_time_window(before_align.0, before_align.1, &time_window_expr)
.unwrap();
assert_eq!(expected_after_align, after_align);
}
}
}
}

View File

@@ -12,32 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeSet, HashSet};
use std::collections::HashSet;
use std::ops::Deref;
use std::sync::{Arc, RwLock};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use api::v1::CreateTableExpr;
use arrow_schema::Fields;
use catalog::CatalogManagerRef;
use common_error::ext::BoxedError;
use common_query::logical_plan::breakup_insert_plan;
use common_meta::key::table_name::TableNameKey;
use common_meta::key::TableMetadataManagerRef;
use common_telemetry::tracing::warn;
use common_telemetry::{debug, info};
use common_time::Timestamp;
use datafusion::optimizer::analyzer::count_wildcard_rule::CountWildcardRule;
use datafusion::optimizer::AnalyzerRule;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::tree_node::TreeNode;
use datafusion_expr::{DmlStatement, LogicalPlan, WriteOp};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::schema::constraint::NOW_FN;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
use datatypes::value::Value;
use operator::expr_helper::column_schemas_to_defs;
use query::query_engine::DefaultSerializer;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use table::metadata::RawTableMeta;
use tokio::sync::oneshot;
use tokio::sync::oneshot::error::TryRecvError;
use tokio::time::Instant;
@@ -47,15 +48,14 @@ use crate::batching_mode::frontend_client::FrontendClient;
use crate::batching_mode::state::TaskState;
use crate::batching_mode::time_window::TimeWindowExpr;
use crate::batching_mode::utils::{
get_table_info_df_schema, sql_to_df_plan, AddAutoColumnRewriter, AddFilterRewriter,
FindGroupByFinalName,
sql_to_df_plan, AddAutoColumnRewriter, AddFilterRewriter, FindGroupByFinalName,
};
use crate::batching_mode::{
DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, MIN_REFRESH_DURATION, SLOW_QUERY_THRESHOLD,
};
use crate::error::{
ConvertColumnSchemaSnafu, DatafusionSnafu, ExternalSnafu, InvalidQuerySnafu,
SubstraitEncodeLogicalPlanSnafu, UnexpectedSnafu,
ConvertColumnSchemaSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, InvalidRequestSnafu,
SubstraitEncodeLogicalPlanSnafu, TableNotFoundMetaSnafu, TableNotFoundSnafu, UnexpectedSnafu,
};
use crate::metrics::{
METRIC_FLOW_BATCHING_ENGINE_QUERY_TIME, METRIC_FLOW_BATCHING_ENGINE_SLOW_QUERY,
@@ -73,7 +73,7 @@ pub struct TaskConfig {
pub expire_after: Option<i64>,
sink_table_name: [String; 3],
pub source_table_names: HashSet<[String; 3]>,
catalog_manager: CatalogManagerRef,
table_meta: TableMetadataManagerRef,
}
#[derive(Clone)]
@@ -93,7 +93,7 @@ impl BatchingTask {
sink_table_name: [String; 3],
source_table_names: Vec<[String; 3]>,
query_ctx: QueryContextRef,
catalog_manager: CatalogManagerRef,
table_meta: TableMetadataManagerRef,
shutdown_rx: oneshot::Receiver<()>,
) -> Self {
Self {
@@ -105,42 +105,12 @@ impl BatchingTask {
expire_after,
sink_table_name,
source_table_names: source_table_names.into_iter().collect(),
catalog_manager,
table_meta,
}),
state: Arc::new(RwLock::new(TaskState::new(query_ctx, shutdown_rx))),
}
}
/// mark time window range (now - expire_after, now) as dirty (or (0, now) if expire_after not set)
///
/// useful for flush_flow to flush dirty time windows range
pub fn mark_all_windows_as_dirty(&self) -> Result<(), Error> {
let now = SystemTime::now();
let now = Timestamp::new_second(
now.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_secs() as _,
);
let lower_bound = self
.config
.expire_after
.map(|e| now.sub_duration(Duration::from_secs(e as _)))
.transpose()
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.unwrap_or(Timestamp::new_second(0));
debug!(
"Flow {} mark range ({:?}, {:?}) as dirty",
self.config.flow_id, lower_bound, now
);
self.state
.write()
.unwrap()
.dirty_time_windows
.add_window(lower_bound, Some(now));
Ok(())
}
/// Test execute, for check syntax or such
pub async fn check_execute(
&self,
@@ -178,8 +148,13 @@ impl BatchingTask {
async fn is_table_exist(&self, table_name: &[String; 3]) -> Result<bool, Error> {
self.config
.catalog_manager
.table_exists(&table_name[0], &table_name[1], &table_name[2], None)
.table_meta
.table_name_manager()
.exists(TableNameKey {
catalog: &table_name[0],
schema: &table_name[1],
table: &table_name[2],
})
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)
@@ -191,10 +166,8 @@ impl BatchingTask {
frontend_client: &Arc<FrontendClient>,
) -> Result<Option<(u32, Duration)>, Error> {
if let Some(new_query) = self.gen_insert_plan(engine).await? {
debug!("Generate new query: {:#?}", new_query);
self.execute_logical_plan(frontend_client, &new_query).await
} else {
debug!("Generate no query");
Ok(None)
}
}
@@ -203,35 +176,67 @@ impl BatchingTask {
&self,
engine: &QueryEngineRef,
) -> Result<Option<LogicalPlan>, Error> {
let (table, df_schema) = get_table_info_df_schema(
self.config.catalog_manager.clone(),
self.config.sink_table_name.clone(),
)
.await?;
let full_table_name = self.config.sink_table_name.clone().join(".");
let table_id = self
.config
.table_meta
.table_name_manager()
.get(common_meta::key::table_name::TableNameKey::new(
&self.config.sink_table_name[0],
&self.config.sink_table_name[1],
&self.config.sink_table_name[2],
))
.await
.with_context(|_| TableNotFoundMetaSnafu {
msg: full_table_name.clone(),
})?
.map(|t| t.table_id())
.with_context(|| TableNotFoundSnafu {
name: full_table_name.clone(),
})?;
let table = self
.config
.table_meta
.table_info_manager()
.get(table_id)
.await
.with_context(|_| TableNotFoundMetaSnafu {
msg: full_table_name.clone(),
})?
.with_context(|| TableNotFoundSnafu {
name: full_table_name.clone(),
})?
.into_inner();
let schema: datatypes::schema::Schema = table
.table_info
.meta
.schema
.clone()
.try_into()
.with_context(|_| DatatypesSnafu {
extra: format!(
"Failed to convert schema from raw schema, raw_schema={:?}",
table.table_info.meta.schema
),
})?;
let df_schema = Arc::new(schema.arrow_schema().clone().try_into().with_context(|_| {
DatafusionSnafu {
context: format!(
"Failed to convert arrow schema to datafusion schema, arrow_schema={:?}",
schema.arrow_schema()
),
}
})?);
let new_query = self
.gen_query_with_time_window(engine.clone(), &table.meta.schema)
.gen_query_with_time_window(engine.clone(), &table.table_info.meta)
.await?;
let insert_into = if let Some((new_query, _column_cnt)) = new_query {
// first check if all columns in input query exists in sink table
// since insert into ref to names in record batch generate by given query
let table_columns = df_schema
.columns()
.into_iter()
.map(|c| c.name)
.collect::<BTreeSet<_>>();
for column in new_query.schema().columns() {
if !table_columns.contains(column.name()) {
return InvalidQuerySnafu {
reason: format!(
"Column {} not found in sink table with columns {:?}",
column, table_columns
),
}
.fail();
}
}
// update_at& time index placeholder (if exists) should have default value
LogicalPlan::Dml(DmlStatement::new(
datafusion_common::TableReference::Full {
@@ -246,9 +251,6 @@ impl BatchingTask {
} else {
return Ok(None);
};
let insert_into = insert_into.recompute_schema().context(DatafusionSnafu {
context: "Failed to recompute schema",
})?;
Ok(Some(insert_into))
}
@@ -257,11 +259,14 @@ impl BatchingTask {
frontend_client: &Arc<FrontendClient>,
expr: CreateTableExpr,
) -> Result<(), Error> {
let catalog = &self.config.sink_table_name[0];
let schema = &self.config.sink_table_name[1];
frontend_client
.create(expr.clone(), catalog, schema)
.await?;
let db_client = frontend_client.get_database_client().await?;
db_client
.database
.create(expr.clone())
.await
.with_context(|_| InvalidRequestSnafu {
context: format!("Failed to create table with expr: {:?}", expr),
})?;
Ok(())
}
@@ -272,78 +277,27 @@ impl BatchingTask {
) -> Result<Option<(u32, Duration)>, Error> {
let instant = Instant::now();
let flow_id = self.config.flow_id;
let db_client = frontend_client.get_database_client().await?;
let peer_addr = db_client.peer.addr;
debug!(
"Executing flow {flow_id}(expire_after={:?} secs) with query {}",
self.config.expire_after, &plan
"Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
self.config.expire_after, peer_addr, &plan
);
let catalog = &self.config.sink_table_name[0];
let schema = &self.config.sink_table_name[1];
let timer = METRIC_FLOW_BATCHING_ENGINE_QUERY_TIME
.with_label_values(&[flow_id.to_string().as_str()])
.start_timer();
// fix all table ref by make it fully qualified, i.e. "table_name" => "catalog_name.schema_name.table_name"
let fixed_plan = plan
.clone()
.transform_down_with_subqueries(|p| {
if let LogicalPlan::TableScan(mut table_scan) = p {
let resolved = table_scan.table_name.resolve(catalog, schema);
table_scan.table_name = resolved.into();
Ok(Transformed::yes(LogicalPlan::TableScan(table_scan)))
} else {
Ok(Transformed::no(p))
}
})
.with_context(|_| DatafusionSnafu {
context: format!("Failed to fix table ref in logical plan, plan={:?}", plan),
})?
.data;
let message = DFLogicalSubstraitConvertor {}
.encode(plan, DefaultSerializer)
.context(SubstraitEncodeLogicalPlanSnafu)?;
let expanded_plan = CountWildcardRule::new()
.analyze(fixed_plan.clone(), &Default::default())
.with_context(|_| DatafusionSnafu {
context: format!(
"Failed to expand wildcard in logical plan, plan={:?}",
fixed_plan
),
})?;
let req = api::v1::greptime_request::Request::Query(api::v1::QueryRequest {
query: Some(api::v1::query_request::Query::LogicalPlan(message.to_vec())),
});
let plan = expanded_plan;
let mut peer_desc = None;
let res = {
let _timer = METRIC_FLOW_BATCHING_ENGINE_QUERY_TIME
.with_label_values(&[flow_id.to_string().as_str()])
.start_timer();
// hack and special handling the insert logical plan
let req = if let Some((insert_to, insert_plan)) =
breakup_insert_plan(&plan, catalog, schema)
{
let message = DFLogicalSubstraitConvertor {}
.encode(&insert_plan, DefaultSerializer)
.context(SubstraitEncodeLogicalPlanSnafu)?;
api::v1::greptime_request::Request::Query(api::v1::QueryRequest {
query: Some(api::v1::query_request::Query::InsertIntoPlan(
api::v1::InsertIntoPlan {
table_name: Some(insert_to),
logical_plan: message.to_vec(),
},
)),
})
} else {
let message = DFLogicalSubstraitConvertor {}
.encode(&plan, DefaultSerializer)
.context(SubstraitEncodeLogicalPlanSnafu)?;
api::v1::greptime_request::Request::Query(api::v1::QueryRequest {
query: Some(api::v1::query_request::Query::LogicalPlan(message.to_vec())),
})
};
frontend_client
.handle(req, catalog, schema, &mut peer_desc)
.await
};
let res = db_client.database.handle(req).await;
drop(timer);
let elapsed = instant.elapsed();
if let Ok(affected_rows) = &res {
@@ -353,23 +307,19 @@ impl BatchingTask {
);
} else if let Err(err) = &res {
warn!(
"Failed to execute Flow {flow_id} on frontend {:?}, result: {err:?}, elapsed: {:?} with query: {}",
peer_desc, elapsed, &plan
"Failed to execute Flow {flow_id} on frontend {}, result: {err:?}, elapsed: {:?} with query: {}",
peer_addr, elapsed, &plan
);
}
// record slow query
if elapsed >= SLOW_QUERY_THRESHOLD {
warn!(
"Flow {flow_id} on frontend {:?} executed for {:?} before complete, query: {}",
peer_desc, elapsed, &plan
"Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
peer_addr, elapsed, &plan
);
METRIC_FLOW_BATCHING_ENGINE_SLOW_QUERY
.with_label_values(&[
flow_id.to_string().as_str(),
&plan.to_string(),
&peer_desc.unwrap_or_default().to_string(),
])
.with_label_values(&[flow_id.to_string().as_str(), &plan.to_string(), &peer_addr])
.observe(elapsed.as_secs_f64());
}
@@ -378,7 +328,12 @@ impl BatchingTask {
.unwrap()
.after_query_exec(elapsed, res.is_ok());
let res = res?;
let res = res.context(InvalidRequestSnafu {
context: format!(
"Failed to execute query for flow={}: \'{}\'",
self.config.flow_id, &plan
),
})?;
Ok(Some((res, elapsed)))
}
@@ -417,10 +372,7 @@ impl BatchingTask {
}
Err(TryRecvError::Empty) => (),
}
state.get_next_start_query_time(
self.config.flow_id,
Some(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT),
)
state.get_next_start_query_time(Some(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT))
};
tokio::time::sleep_until(sleep_until).await;
}
@@ -434,18 +386,14 @@ impl BatchingTask {
continue;
}
// TODO(discord9): this error should have better place to go, but for now just print error, also more context is needed
Err(err) => {
match new_query {
Some(query) => {
common_telemetry::error!(err; "Failed to execute query for flow={} with query: {query}", self.config.flow_id)
}
None => {
common_telemetry::error!(err; "Failed to generate query for flow={}", self.config.flow_id)
}
Err(err) => match new_query {
Some(query) => {
common_telemetry::error!(err; "Failed to execute query for flow={} with query: {query}", self.config.flow_id)
}
// also sleep for a little while before try again to prevent flooding logs
tokio::time::sleep(MIN_REFRESH_DURATION).await;
}
None => {
common_telemetry::error!(err; "Failed to generate query for flow={}", self.config.flow_id)
}
},
}
}
}
@@ -470,7 +418,7 @@ impl BatchingTask {
async fn gen_query_with_time_window(
&self,
engine: QueryEngineRef,
sink_table_schema: &Arc<Schema>,
sink_table_meta: &RawTableMeta,
) -> Result<Option<(LogicalPlan, usize)>, Error> {
let query_ctx = self.state.read().unwrap().query_ctx.clone();
let start = SystemTime::now();
@@ -529,11 +477,9 @@ impl BatchingTask {
debug!(
"Flow id = {:?}, can't get window size: precise_lower_bound={expire_time_window_bound:?}, using the same query", self.config.flow_id
);
// clean dirty time window too, this could be from create flow's check_execute
self.state.write().unwrap().dirty_time_windows.clean();
let mut add_auto_column =
AddAutoColumnRewriter::new(sink_table_schema.clone());
AddAutoColumnRewriter::new(sink_table_meta.schema.clone());
let plan = self
.config
.plan
@@ -569,10 +515,8 @@ impl BatchingTask {
return Ok(None);
};
// TODO(discord9): add auto column or not? This might break compatibility for auto created sink table before this, but that's ok right?
let mut add_filter = AddFilterRewriter::new(expr);
let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone());
let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_meta.schema.clone());
// make a not optimized plan for clearer unparse
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false)
.await?;
@@ -590,7 +534,7 @@ impl BatchingTask {
}
// auto created table have a auto added column `update_at`, and optional have a `AUTO_CREATED_PLACEHOLDER_TS_COL` column for time index placeholder if no timestamp column is specified
// TODO(discord9): for now no default value is set for auto added column for compatibility reason with streaming mode, but this might change in favor of simpler code?
// TODO(discord9): unit test
fn create_table_with_expr(
plan: &LogicalPlan,
sink_table_name: &[String; 3],
@@ -614,7 +558,11 @@ fn create_table_with_expr(
AUTO_CREATED_UPDATE_AT_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
);
)
.with_default_constraint(Some(ColumnDefaultConstraint::Function(NOW_FN.to_string())))
.context(DatatypesSnafu {
extra: "Failed to build column `update_at TimestampMillisecond default now()`",
})?;
column_schemas.push(update_at_schema);
let time_index = if let Some(time_index) = first_time_stamp {
@@ -626,7 +574,16 @@ fn create_table_with_expr(
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true),
.with_time_index(true)
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::Timestamp(
Timestamp::new_millisecond(0),
))))
.context(DatatypesSnafu {
extra: format!(
"Failed to build column `{} TimestampMillisecond TIME INDEX default 0`",
AUTO_CREATED_PLACEHOLDER_TS_COL
),
})?,
);
AUTO_CREATED_PLACEHOLDER_TS_COL.to_string()
};
@@ -718,14 +675,20 @@ mod test {
AUTO_CREATED_UPDATE_AT_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
);
)
.with_default_constraint(Some(ColumnDefaultConstraint::Function(NOW_FN.to_string())))
.unwrap();
let ts_placeholder_schema = ColumnSchema::new(
AUTO_CREATED_PLACEHOLDER_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true);
.with_time_index(true)
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::Timestamp(
Timestamp::new_millisecond(0),
))))
.unwrap();
let testcases = vec![
TestCase {

View File

@@ -72,17 +72,6 @@ pub struct TimeWindowExpr {
df_schema: DFSchema,
}
impl std::fmt::Display for TimeWindowExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TimeWindowExpr")
.field("phy_expr", &self.phy_expr.to_string())
.field("column_name", &self.column_name)
.field("logical_expr", &self.logical_expr.to_string())
.field("df_schema", &self.df_schema)
.finish()
}
}
impl TimeWindowExpr {
pub fn from_expr(
expr: &Expr,
@@ -267,7 +256,7 @@ fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestam
Ok(val)
}
/// Return (`the column name of time index column`, `the time window expr`, `the expected time unit of time index column`, `the expr's schema for evaluating the time window`)
/// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
///
/// The time window expr is expected to have one input column with Timestamp type, and also return Timestamp type, the time window expr is expected
/// to be monotonic increasing and appears in the innermost GROUP BY clause

View File

@@ -14,63 +14,29 @@
//! some utils for helping with batching mode
use std::collections::{BTreeSet, HashSet};
use std::collections::HashSet;
use std::sync::Arc;
use catalog::CatalogManagerRef;
use common_error::ext::BoxedError;
use common_telemetry::debug;
use common_telemetry::{debug, info};
use datafusion::error::Result as DfResult;
use datafusion::logical_expr::Expr;
use datafusion::sql::unparser::Unparser;
use datafusion_common::tree_node::{
Transformed, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
};
use datafusion_common::{DFSchema, DataFusionError, ScalarValue};
use datafusion_expr::{Distinct, LogicalPlan, Projection};
use datatypes::schema::SchemaRef;
use datafusion_common::DataFusionError;
use datafusion_expr::{Distinct, LogicalPlan};
use datatypes::schema::RawSchema;
use query::parser::QueryLanguageParser;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
use table::metadata::TableInfo;
use snafu::ResultExt;
use crate::adapter::AUTO_CREATED_PLACEHOLDER_TS_COL;
use crate::df_optimizer::apply_df_optimizer;
use crate::error::{DatafusionSnafu, ExternalSnafu, TableNotFoundSnafu};
use crate::{Error, TableName};
pub async fn get_table_info_df_schema(
catalog_mr: CatalogManagerRef,
table_name: TableName,
) -> Result<(Arc<TableInfo>, Arc<DFSchema>), Error> {
let full_table_name = table_name.clone().join(".");
let table = catalog_mr
.table(&table_name[0], &table_name[1], &table_name[2], None)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.with_context(|| TableNotFoundSnafu {
name: full_table_name.clone(),
})?;
let table_info = table.table_info().clone();
let schema = table_info.meta.schema.clone();
let df_schema: Arc<DFSchema> = Arc::new(
schema
.arrow_schema()
.clone()
.try_into()
.with_context(|_| DatafusionSnafu {
context: format!(
"Failed to convert arrow schema to datafusion schema, arrow_schema={:?}",
schema.arrow_schema()
),
})?,
);
Ok((table_info, df_schema))
}
use crate::error::{DatafusionSnafu, ExternalSnafu};
use crate::Error;
/// Convert sql to datafusion logical plan
pub async fn sql_to_df_plan(
@@ -198,16 +164,14 @@ impl TreeNodeVisitor<'_> for FindGroupByFinalName {
/// (which doesn't necessary need to have exact name just need to be a extra timestamp column)
/// and `__ts_placeholder`(this column need to have exact this name and be a timestamp)
/// with values like `now()` and `0`
///
/// it also give existing columns alias to column in sink table if needed
#[derive(Debug)]
pub struct AddAutoColumnRewriter {
pub schema: SchemaRef,
pub schema: RawSchema,
pub is_rewritten: bool,
}
impl AddAutoColumnRewriter {
pub fn new(schema: SchemaRef) -> Self {
pub fn new(schema: RawSchema) -> Self {
Self {
schema,
is_rewritten: false,
@@ -217,97 +181,37 @@ impl AddAutoColumnRewriter {
impl TreeNodeRewriter for AddAutoColumnRewriter {
type Node = LogicalPlan;
fn f_down(&mut self, mut node: Self::Node) -> DfResult<Transformed<Self::Node>> {
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
if self.is_rewritten {
return Ok(Transformed::no(node));
}
// if is distinct all, wrap it in a projection
if let LogicalPlan::Distinct(Distinct::All(_)) = &node {
let mut exprs = vec![];
for field in node.schema().fields().iter() {
exprs.push(Expr::Column(datafusion::common::Column::new_unqualified(
field.name(),
)));
}
let projection =
LogicalPlan::Projection(Projection::try_new(exprs, Arc::new(node.clone()))?);
node = projection;
}
// handle table_scan by wrap it in a projection
else if let LogicalPlan::TableScan(table_scan) = node {
let mut exprs = vec![];
for field in table_scan.projected_schema.fields().iter() {
exprs.push(Expr::Column(datafusion::common::Column::new(
Some(table_scan.table_name.clone()),
field.name(),
)));
}
let projection = LogicalPlan::Projection(Projection::try_new(
exprs,
Arc::new(LogicalPlan::TableScan(table_scan)),
)?);
node = projection;
}
// only do rewrite if found the outermost projection
let mut exprs = if let LogicalPlan::Projection(project) = &node {
project.expr.clone()
} else {
// if is distinct all, go one level down
if let LogicalPlan::Distinct(Distinct::All(_)) = node {
return Ok(Transformed::no(node));
};
let all_names = self
.schema
.column_schemas()
.iter()
.map(|c| c.name.clone())
.collect::<BTreeSet<_>>();
// first match by position
for (idx, expr) in exprs.iter_mut().enumerate() {
if !all_names.contains(&expr.qualified_name().1) {
if let Some(col_name) = self
.schema
.column_schemas()
.get(idx)
.map(|c| c.name.clone())
{
// if the data type mismatched, later check_execute will error out
// hence no need to check it here, beside, optimize pass might be able to cast it
// so checking here is not necessary
*expr = expr.clone().alias(col_name);
}
}
}
// FIXME(discord9): just read plan.expr and do stuffs
let mut exprs = node.expressions();
// add columns if have different column count
let query_col_cnt = exprs.len();
let table_col_cnt = self.schema.column_schemas().len();
debug!("query_col_cnt={query_col_cnt}, table_col_cnt={table_col_cnt}");
let placeholder_ts_expr =
datafusion::logical_expr::lit(ScalarValue::TimestampMillisecond(Some(0), None))
.alias(AUTO_CREATED_PLACEHOLDER_TS_COL);
let table_col_cnt = self.schema.column_schemas.len();
info!("query_col_cnt={query_col_cnt}, table_col_cnt={table_col_cnt}");
if query_col_cnt == table_col_cnt {
// still need to add alias, see below
self.is_rewritten = true;
return Ok(Transformed::no(node));
} else if query_col_cnt + 1 == table_col_cnt {
let last_col_schema = self.schema.column_schemas().last().unwrap();
let last_col_schema = self.schema.column_schemas.last().unwrap();
// if time index column is auto created add it
if last_col_schema.name == AUTO_CREATED_PLACEHOLDER_TS_COL
&& self.schema.timestamp_index() == Some(table_col_cnt - 1)
&& self.schema.timestamp_index == Some(table_col_cnt - 1)
{
exprs.push(placeholder_ts_expr);
exprs.push(datafusion::logical_expr::lit(0));
} else if last_col_schema.data_type.is_timestamp() {
// is the update at column
exprs.push(datafusion::prelude::now().alias(&last_col_schema.name));
exprs.push(datafusion::prelude::now());
} else {
// helpful error message
return Err(DataFusionError::Plan(format!(
@@ -317,11 +221,11 @@ impl TreeNodeRewriter for AddAutoColumnRewriter {
)));
}
} else if query_col_cnt + 2 == table_col_cnt {
let mut col_iter = self.schema.column_schemas().iter().rev();
let mut col_iter = self.schema.column_schemas.iter().rev();
let last_col_schema = col_iter.next().unwrap();
let second_last_col_schema = col_iter.next().unwrap();
if second_last_col_schema.data_type.is_timestamp() {
exprs.push(datafusion::prelude::now().alias(&second_last_col_schema.name));
exprs.push(datafusion::prelude::now());
} else {
return Err(DataFusionError::Plan(format!(
"Expect the second last column in the table to be timestamp column, found column {} with type {:?}",
@@ -331,9 +235,9 @@ impl TreeNodeRewriter for AddAutoColumnRewriter {
}
if last_col_schema.name == AUTO_CREATED_PLACEHOLDER_TS_COL
&& self.schema.timestamp_index() == Some(table_col_cnt - 1)
&& self.schema.timestamp_index == Some(table_col_cnt - 1)
{
exprs.push(placeholder_ts_expr);
exprs.push(datafusion::logical_expr::lit(0));
} else {
return Err(DataFusionError::Plan(format!(
"Expect timestamp column {}, found {:?}",
@@ -342,8 +246,8 @@ impl TreeNodeRewriter for AddAutoColumnRewriter {
}
} else {
return Err(DataFusionError::Plan(format!(
"Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?} at node {:?}",
query_col_cnt, exprs, table_col_cnt, self.schema.column_schemas(), node
"Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?}",
query_col_cnt, node.expressions(), table_col_cnt, self.schema.column_schemas
)));
}
@@ -351,11 +255,6 @@ impl TreeNodeRewriter for AddAutoColumnRewriter {
let new_plan = node.with_new_exprs(exprs, node.inputs().into_iter().cloned().collect())?;
Ok(Transformed::yes(new_plan))
}
/// We might add new columns, so we need to recompute the schema
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
node.recompute_schema().map(Transformed::yes)
}
}
// TODO(discord9): a method to found out the precise time window
@@ -402,11 +301,9 @@ impl TreeNodeRewriter for AddFilterRewriter {
#[cfg(test)]
mod test {
use std::sync::Arc;
use datafusion_common::tree_node::TreeNode as _;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::schema::ColumnSchema;
use pretty_assertions::assert_eq;
use session::context::QueryContext;
@@ -489,7 +386,7 @@ mod test {
// add update_at
(
"SELECT number FROM numbers_with_ts",
Ok("SELECT numbers_with_ts.number, now() AS ts FROM numbers_with_ts"),
Ok("SELECT numbers_with_ts.number, now() FROM numbers_with_ts"),
vec![
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
@@ -503,7 +400,7 @@ mod test {
// add ts placeholder
(
"SELECT number FROM numbers_with_ts",
Ok("SELECT numbers_with_ts.number, CAST('1970-01-01 00:00:00' AS TIMESTAMP) AS __ts_placeholder FROM numbers_with_ts"),
Ok("SELECT numbers_with_ts.number, 0 FROM numbers_with_ts"),
vec![
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
@@ -531,7 +428,7 @@ mod test {
// add update_at and ts placeholder
(
"SELECT number FROM numbers_with_ts",
Ok("SELECT numbers_with_ts.number, now() AS update_at, CAST('1970-01-01 00:00:00' AS TIMESTAMP) AS __ts_placeholder FROM numbers_with_ts"),
Ok("SELECT numbers_with_ts.number, now(), 0 FROM numbers_with_ts"),
vec![
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
@@ -550,7 +447,7 @@ mod test {
// add ts placeholder
(
"SELECT number, ts FROM numbers_with_ts",
Ok("SELECT numbers_with_ts.number, numbers_with_ts.ts AS update_at, CAST('1970-01-01 00:00:00' AS TIMESTAMP) AS __ts_placeholder FROM numbers_with_ts"),
Ok("SELECT numbers_with_ts.number, numbers_with_ts.ts, 0 FROM numbers_with_ts"),
vec![
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
@@ -569,7 +466,7 @@ mod test {
// add update_at after time index column
(
"SELECT number, ts FROM numbers_with_ts",
Ok("SELECT numbers_with_ts.number, numbers_with_ts.ts, now() AS update_atat FROM numbers_with_ts"),
Ok("SELECT numbers_with_ts.number, numbers_with_ts.ts, now() FROM numbers_with_ts"),
vec![
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
@@ -631,8 +528,8 @@ mod test {
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
for (before, after, column_schemas) in testcases {
let schema = Arc::new(Schema::new(column_schemas));
let mut add_auto_column_rewriter = AddAutoColumnRewriter::new(schema);
let raw_schema = RawSchema::new(column_schemas);
let mut add_auto_column_rewriter = AddAutoColumnRewriter::new(raw_schema);
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), before, false)
.await

View File

@@ -49,8 +49,6 @@ pub trait FlowEngine {
async fn flush_flow(&self, flow_id: FlowId) -> Result<usize, Error>;
/// Check if the flow exists
async fn flow_exist(&self, flow_id: FlowId) -> Result<bool, Error>;
/// List all flows
async fn list_flows(&self) -> Result<impl IntoIterator<Item = FlowId>, Error>;
/// Handle the insert requests for the flow
async fn handle_flow_inserts(
&self,

View File

@@ -149,13 +149,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Unsupported: {reason}"))]
Unsupported {
reason: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unsupported temporal filter: {reason}"))]
UnsupportedTemporalFilter {
reason: String,
@@ -196,25 +189,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Illegal check task state: {reason}"))]
IllegalCheckTaskState {
reason: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"Failed to sync with check task for flow {} with allow_drop={}",
flow_id,
allow_drop
))]
SyncCheckTask {
flow_id: FlowId,
allow_drop: bool,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to start server"))]
StartServer {
#[snafu(implicit)]
@@ -306,12 +280,10 @@ impl ErrorExt for Error {
Self::CreateFlow { .. } | Self::Arrow { .. } | Self::Time { .. } => {
StatusCode::EngineExecuteQuery
}
Self::Unexpected { .. }
| Self::SyncCheckTask { .. }
| Self::IllegalCheckTaskState { .. } => StatusCode::Unexpected,
Self::NotImplemented { .. }
| Self::UnsupportedTemporalFilter { .. }
| Self::Unsupported { .. } => StatusCode::Unsupported,
Self::Unexpected { .. } => StatusCode::Unexpected,
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
StatusCode::Unsupported
}
Self::External { source, .. } => source.status_code(),
Self::Internal { .. } | Self::CacheRequired { .. } => StatusCode::Internal,
Self::StartServer { source, .. } | Self::ShutdownServer { source, .. } => {

View File

@@ -43,8 +43,8 @@ mod utils;
#[cfg(test)]
mod test_utils;
pub use adapter::{FlowConfig, FlowStreamingEngine, FlowWorkerManagerRef, FlownodeOptions};
pub use batching_mode::frontend_client::{FrontendClient, GrpcQueryHandlerWithBoxedError};
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
pub use batching_mode::frontend_client::FrontendClient;
pub(crate) use engine::{CreateFlowArgs, FlowId, TableName};
pub use error::{Error, Result};
pub use server::{

View File

@@ -29,7 +29,6 @@ use common_meta::key::TableMetadataManagerRef;
use common_meta::kv_backend::KvBackendRef;
use common_meta::node_manager::{Flownode, NodeManagerRef};
use common_query::Output;
use common_runtime::JoinHandle;
use common_telemetry::tracing::info;
use futures::{FutureExt, TryStreamExt};
use greptime_proto::v1::flow::{flow_server, FlowRequest, FlowResponse, InsertRequests};
@@ -51,10 +50,7 @@ use tonic::codec::CompressionEncoding;
use tonic::transport::server::TcpIncoming;
use tonic::{Request, Response, Status};
use crate::adapter::flownode_impl::{FlowDualEngine, FlowDualEngineRef};
use crate::adapter::{create_worker, FlowWorkerManagerRef};
use crate::batching_mode::engine::BatchingEngine;
use crate::engine::FlowEngine;
use crate::error::{
to_status_with_last_err, CacheRequiredSnafu, CreateFlowSnafu, ExternalSnafu, FlowNotFoundSnafu,
ListFlowsSnafu, ParseAddrSnafu, ShutdownServerSnafu, StartServerSnafu, UnexpectedSnafu,
@@ -63,21 +59,19 @@ use crate::heartbeat::HeartbeatTask;
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
use crate::transform::register_function_to_query_engine;
use crate::utils::{SizeReportSender, StateReportHandler};
use crate::{CreateFlowArgs, Error, FlowStreamingEngine, FlownodeOptions, FrontendClient};
use crate::{CreateFlowArgs, Error, FlowWorkerManager, FlownodeOptions, FrontendClient};
pub const FLOW_NODE_SERVER_NAME: &str = "FLOW_NODE_SERVER";
/// wrapping flow node manager to avoid orphan rule with Arc<...>
#[derive(Clone)]
pub struct FlowService {
/// TODO(discord9): replace with dual engine
pub dual_engine: FlowDualEngineRef,
pub manager: FlowWorkerManagerRef,
}
impl FlowService {
pub fn new(manager: FlowDualEngineRef) -> Self {
Self {
dual_engine: manager,
}
pub fn new(manager: FlowWorkerManagerRef) -> Self {
Self { manager }
}
}
@@ -92,7 +86,7 @@ impl flow_server::Flow for FlowService {
.start_timer();
let request = request.into_inner();
self.dual_engine
self.manager
.handle(request)
.await
.map_err(|err| {
@@ -132,7 +126,7 @@ impl flow_server::Flow for FlowService {
.with_label_values(&["in"])
.inc_by(row_count as u64);
self.dual_engine
self.manager
.handle_inserts(request)
.await
.map(Response::new)
@@ -145,16 +139,11 @@ pub struct FlownodeServer {
inner: Arc<FlownodeServerInner>,
}
/// FlownodeServerInner is the inner state of FlownodeServer,
/// this struct mostly useful for construct/start and stop the
/// flow node server
struct FlownodeServerInner {
/// worker shutdown signal, not to be confused with server_shutdown_tx
worker_shutdown_tx: Mutex<broadcast::Sender<()>>,
/// server shutdown signal for shutdown grpc server
server_shutdown_tx: Mutex<broadcast::Sender<()>>,
/// streaming task handler
streaming_task_handler: Mutex<Option<JoinHandle<()>>>,
flow_service: FlowService,
}
@@ -167,28 +156,16 @@ impl FlownodeServer {
flow_service,
worker_shutdown_tx: Mutex::new(tx),
server_shutdown_tx: Mutex::new(server_tx),
streaming_task_handler: Mutex::new(None),
}),
}
}
/// Start the background task for streaming computation.
async fn start_workers(&self) -> Result<(), Error> {
let manager_ref = self.inner.flow_service.dual_engine.clone();
let handle = manager_ref
.streaming_engine()
let manager_ref = self.inner.flow_service.manager.clone();
let _handle = manager_ref
.clone()
.run_background(Some(self.inner.worker_shutdown_tx.lock().await.subscribe()));
self.inner
.streaming_task_handler
.lock()
.await
.replace(handle);
self.inner
.flow_service
.dual_engine
.start_flow_consistent_check_task()
.await?;
Ok(())
}
@@ -199,11 +176,6 @@ impl FlownodeServer {
if tx.send(()).is_err() {
info!("Receiver dropped, the flow node server has already shutdown");
}
self.inner
.flow_service
.dual_engine
.stop_flow_consistent_check_task()
.await?;
Ok(())
}
}
@@ -300,8 +272,8 @@ impl FlownodeInstance {
&self.flownode_server
}
pub fn flow_engine(&self) -> FlowDualEngineRef {
self.flownode_server.inner.flow_service.dual_engine.clone()
pub fn flow_worker_manager(&self) -> FlowWorkerManagerRef {
self.flownode_server.inner.flow_service.manager.clone()
}
pub fn setup_services(&mut self, services: ServerHandlers) {
@@ -362,6 +334,7 @@ impl FlownodeBuilder {
None,
None,
None,
None,
false,
Default::default(),
self.opts.query.clone(),
@@ -370,21 +343,12 @@ impl FlownodeBuilder {
self.build_manager(query_engine_factory.query_engine())
.await?,
);
let batching = Arc::new(BatchingEngine::new(
self.frontend_client.clone(),
query_engine_factory.query_engine(),
self.flow_metadata_manager.clone(),
self.table_meta.clone(),
self.catalog_manager.clone(),
));
let dual = FlowDualEngine::new(
manager.clone(),
batching,
self.flow_metadata_manager.clone(),
self.catalog_manager.clone(),
);
let server = FlownodeServer::new(FlowService::new(Arc::new(dual)));
if let Err(err) = self.recover_flows(&manager).await {
common_telemetry::error!(err; "Failed to recover flows");
}
let server = FlownodeServer::new(FlowService::new(manager.clone()));
let heartbeat_task = self.heartbeat_task;
@@ -401,7 +365,7 @@ impl FlownodeBuilder {
/// or recover all existing flow tasks if in standalone mode(nodeid is None)
///
/// TODO(discord9): persistent flow tasks with internal state
async fn recover_flows(&self, manager: &FlowDualEngine) -> Result<usize, Error> {
async fn recover_flows(&self, manager: &FlowWorkerManagerRef) -> Result<usize, Error> {
let nodeid = self.opts.node_id;
let to_be_recovered: Vec<_> = if let Some(nodeid) = nodeid {
let to_be_recover = self
@@ -473,7 +437,7 @@ impl FlownodeBuilder {
),
};
manager
.create_flow(args)
.create_flow_inner(args)
.await
.map_err(BoxedError::new)
.with_context(|_| CreateFlowSnafu {
@@ -489,7 +453,7 @@ impl FlownodeBuilder {
async fn build_manager(
&mut self,
query_engine: Arc<dyn QueryEngine>,
) -> Result<FlowStreamingEngine, Error> {
) -> Result<FlowWorkerManager, Error> {
let table_meta = self.table_meta.clone();
register_function_to_query_engine(&query_engine);
@@ -498,7 +462,7 @@ impl FlownodeBuilder {
let node_id = self.opts.node_id.map(|id| id as u32);
let mut man = FlowStreamingEngine::new(node_id, query_engine, table_meta);
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta);
for worker_id in 0..num_workers {
let (tx, rx) = oneshot::channel();
@@ -580,10 +544,6 @@ impl<'a> FlownodeServiceBuilder<'a> {
}
}
/// Basically a tiny frontend that communicates with datanode, different from [`FrontendClient`] which
/// connect to a real frontend instead, this is used for flow's streaming engine. And is for simple query.
///
/// For heavy query use [`FrontendClient`] which offload computation to frontend, lifting the load from flownode
#[derive(Clone)]
pub struct FrontendInvoker {
inserter: Arc<Inserter>,

View File

@@ -153,6 +153,7 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
None,
None,
None,
None,
false,
QueryOptions::default(),
);

View File

@@ -270,6 +270,7 @@ mod test {
None,
None,
None,
None,
false,
QueryOptions::default(),
);

View File

@@ -15,7 +15,6 @@ api.workspace = true
arc-swap = "1.0"
async-trait.workspace = true
auth.workspace = true
bytes.workspace = true
cache.workspace = true
catalog.workspace = true
client.workspace = true
@@ -40,7 +39,6 @@ datafusion.workspace = true
datafusion-expr.workspace = true
datanode.workspace = true
datatypes.workspace = true
futures.workspace = true
humantime-serde.workspace = true
lazy_static.workspace = true
log-query.workspace = true
@@ -49,7 +47,6 @@ meta-client.workspace = true
num_cpus.workspace = true
opentelemetry-proto.workspace = true
operator.workspace = true
otel-arrow-rust.workspace = true
partition.workspace = true
pipeline.workspace = true
prometheus.workspace = true

View File

@@ -19,8 +19,6 @@ use common_error::define_into_tonic_status;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use common_query::error::datafusion_status_code;
use datafusion::error::DataFusionError;
use session::ReadPreference;
use snafu::{Location, Snafu};
use store_api::storage::RegionId;
@@ -347,15 +345,7 @@ pub enum Error {
SubstraitDecodeLogicalPlan {
#[snafu(implicit)]
location: Location,
source: common_query::error::Error,
},
#[snafu(display("DataFusionError"))]
DataFusion {
#[snafu(source)]
error: DataFusionError,
#[snafu(implicit)]
location: Location,
source: substrait::error::Error,
},
}
@@ -433,8 +423,6 @@ impl ErrorExt for Error {
Error::TableOperation { source, .. } => source.status_code(),
Error::InFlightWriteBytesExceeded { .. } => StatusCode::RateLimited,
Error::DataFusion { error, .. } => datafusion_status_code::<Self>(error, None),
}
}

View File

@@ -278,7 +278,7 @@ impl SqlQueryHandler for Instance {
// plan should be prepared before exec
// we'll do check there
self.query_engine
.execute(plan.clone(), query_ctx)
.execute(plan, query_ctx)
.await
.context(ExecLogicalPlanSnafu)
}

View File

@@ -24,9 +24,11 @@ use common_meta::key::flow::FlowMetadataManager;
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::KvBackendRef;
use common_meta::node_manager::NodeManagerRef;
use common_meta::snapshot::MetadataSnapshotManager;
use operator::delete::Deleter;
use operator::flow::FlowServiceOperator;
use operator::insert::Inserter;
use operator::metadata::MetadataSnapshotOperator;
use operator::procedure::ProcedureServiceOperator;
use operator::request::Requester;
use operator::statement::{StatementExecutor, StatementExecutorRef};
@@ -55,6 +57,7 @@ pub struct FrontendBuilder {
plugins: Option<Plugins>,
procedure_executor: ProcedureExecutorRef,
stats: StatementStatistics,
metadata_snapshot_manager: Option<MetadataSnapshotManager>,
}
impl FrontendBuilder {
@@ -77,6 +80,17 @@ impl FrontendBuilder {
plugins: None,
procedure_executor,
stats,
metadata_snapshot_manager: None,
}
}
pub fn with_metadata_snapshot_manager(
self,
metadata_snapshot_manager: MetadataSnapshotManager,
) -> Self {
Self {
metadata_snapshot_manager: Some(metadata_snapshot_manager),
..self
}
}
@@ -158,12 +172,17 @@ impl FrontendBuilder {
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
let flow_service = FlowServiceOperator::new(flow_metadata_manager, node_manager.clone());
let metadata_snapshot_operator = self
.metadata_snapshot_manager
.map(|manager| Arc::new(MetadataSnapshotOperator::new(manager)) as _);
let query_engine = QueryEngineFactory::new_with_plugins(
self.catalog_manager.clone(),
Some(region_query_handler.clone()),
Some(table_mutation_handler),
Some(procedure_service_handler),
Some(Arc::new(flow_service)),
metadata_snapshot_operator,
true,
plugins.clone(),
self.options.query.clone(),

View File

@@ -12,33 +12,29 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::ddl_request::{Expr as DdlExpr, Expr};
use api::v1::greptime_request::Request;
use api::v1::query_request::Query;
use api::v1::{
DeleteRequests, DropFlowExpr, InsertIntoPlan, InsertRequests, RowDeleteRequests,
RowInsertRequests,
};
use api::v1::{DeleteRequests, DropFlowExpr, InsertRequests, RowDeleteRequests, RowInsertRequests};
use async_trait::async_trait;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
use common_base::AffectedRows;
use common_query::logical_plan::add_insert_to_logical_plan;
use common_query::Output;
use common_telemetry::tracing::{self};
use datafusion::execution::SessionStateBuilder;
use query::parser::PromQuery;
use servers::interceptor::{GrpcQueryInterceptor, GrpcQueryInterceptorRef};
use servers::query_handler::grpc::{GrpcQueryHandler, RawRecordBatch};
use servers::query_handler::sql::SqlQueryHandler;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use table::table_name::TableName;
use crate::error::{
CatalogSnafu, DataFusionSnafu, Error, InFlightWriteBytesExceededSnafu,
IncompleteGrpcRequestSnafu, NotSupportedSnafu, PermissionSnafu, PlanStatementSnafu, Result,
SubstraitDecodeLogicalPlanSnafu, TableNotFoundSnafu, TableOperationSnafu,
CatalogSnafu, Error, InFlightWriteBytesExceededSnafu, IncompleteGrpcRequestSnafu,
NotSupportedSnafu, PermissionSnafu, Result, SubstraitDecodeLogicalPlanSnafu,
TableNotFoundSnafu, TableOperationSnafu,
};
use crate::instance::{attach_timer, Instance};
use crate::metrics::{
@@ -95,31 +91,14 @@ impl GrpcQueryHandler for Instance {
Query::LogicalPlan(plan) => {
// this path is useful internally when flownode needs to execute a logical plan through gRPC interface
let timer = GRPC_HANDLE_PLAN_ELAPSED.start_timer();
// use dummy catalog to provide table
let plan_decoder = self
.query_engine()
.engine_context(ctx.clone())
.new_plan_decoder()
.context(PlanStatementSnafu)?;
let dummy_catalog_list =
Arc::new(catalog::table_source::dummy_catalog::DummyCatalogList::new(
self.catalog_manager().clone(),
));
let logical_plan = plan_decoder
.decode(bytes::Bytes::from(plan), dummy_catalog_list, true)
let plan = DFLogicalSubstraitConvertor {}
.decode(&*plan, SessionStateBuilder::default().build())
.await
.context(SubstraitDecodeLogicalPlanSnafu)?;
let output =
SqlQueryHandler::do_exec_plan(self, logical_plan, ctx.clone()).await?;
let output = SqlQueryHandler::do_exec_plan(self, plan, ctx.clone()).await?;
attach_timer(output, timer)
}
Query::InsertIntoPlan(insert) => {
self.handle_insert_plan(insert, ctx.clone()).await?
}
Query::PromRangeQuery(promql) => {
let timer = GRPC_HANDLE_PROMQL_ELAPSED.start_timer();
let prom_query = PromQuery {
@@ -305,91 +284,6 @@ fn fill_catalog_and_schema_from_context(ddl_expr: &mut DdlExpr, ctx: &QueryConte
}
impl Instance {
async fn handle_insert_plan(
&self,
insert: InsertIntoPlan,
ctx: QueryContextRef,
) -> Result<Output> {
let timer = GRPC_HANDLE_PLAN_ELAPSED.start_timer();
let table_name = insert.table_name.context(IncompleteGrpcRequestSnafu {
err_msg: "'table_name' is absent in InsertIntoPlan",
})?;
// use dummy catalog to provide table
let plan_decoder = self
.query_engine()
.engine_context(ctx.clone())
.new_plan_decoder()
.context(PlanStatementSnafu)?;
let dummy_catalog_list =
Arc::new(catalog::table_source::dummy_catalog::DummyCatalogList::new(
self.catalog_manager().clone(),
));
// no optimize yet since we still need to add stuff
let logical_plan = plan_decoder
.decode(
bytes::Bytes::from(insert.logical_plan),
dummy_catalog_list,
false,
)
.await
.context(SubstraitDecodeLogicalPlanSnafu)?;
let table = self
.catalog_manager()
.table(
&table_name.catalog_name,
&table_name.schema_name,
&table_name.table_name,
None,
)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: [
table_name.catalog_name.clone(),
table_name.schema_name.clone(),
table_name.table_name.clone(),
]
.join("."),
})?;
let table_info = table.table_info();
let df_schema = Arc::new(
table_info
.meta
.schema
.arrow_schema()
.clone()
.try_into()
.context(DataFusionSnafu)?,
);
let insert_into = add_insert_to_logical_plan(table_name, df_schema, logical_plan)
.context(SubstraitDecodeLogicalPlanSnafu)?;
let engine_ctx = self.query_engine().engine_context(ctx.clone());
let state = engine_ctx.state();
// Analyze the plan
let analyzed_plan = state
.analyzer()
.execute_and_check(insert_into, state.config_options(), |_, _| {})
.context(common_query::error::GeneralDataFusionSnafu)
.context(SubstraitDecodeLogicalPlanSnafu)?;
// Optimize the plan
let optimized_plan = state
.optimize(&analyzed_plan)
.context(common_query::error::GeneralDataFusionSnafu)
.context(SubstraitDecodeLogicalPlanSnafu)?;
let output = SqlQueryHandler::do_exec_plan(self, optimized_plan, ctx.clone()).await?;
Ok(attach_timer(output, timer))
}
#[tracing::instrument(skip_all)]
pub async fn handle_inserts(
&self,

View File

@@ -27,7 +27,6 @@ use servers::http::{HttpServer, HttpServerBuilder};
use servers::interceptor::LogIngestInterceptorRef;
use servers::metrics_handler::MetricsHandler;
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
use servers::otel_arrow::OtelArrowServiceHandler;
use servers::postgres::PostgresServer;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdapter;
use servers::query_handler::sql::ServerSqlQueryHandlerAdapter;
@@ -163,7 +162,6 @@ where
let grpc_server = builder
.database_handler(greptime_request_handler.clone())
.prometheus_handler(self.instance.clone(), user_provider.clone())
.otel_arrow_handler(OtelArrowServiceHandler(self.instance.clone()))
.flight_handler(Arc::new(greptime_request_handler))
.build();
Ok(grpc_server)

View File

@@ -294,20 +294,10 @@ pub async fn metasrv_builder(
let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef;
let selector = if let Some(selector) = plugins.get::<SelectorRef>() {
info!("Using selector from plugins");
selector
} else {
let selector = match opts.selector {
SelectorType::LoadBased => Arc::new(LoadBasedSelector::default()) as SelectorRef,
SelectorType::LeaseBased => Arc::new(LeaseBasedSelector) as SelectorRef,
SelectorType::RoundRobin => Arc::new(RoundRobinSelector::default()) as SelectorRef,
};
info!(
"Using selector from options, selector type: {}",
opts.selector.as_ref()
);
selector
let selector = match opts.selector {
SelectorType::LoadBased => Arc::new(LoadBasedSelector::default()) as SelectorRef,
SelectorType::LeaseBased => Arc::new(LeaseBasedSelector) as SelectorRef,
SelectorType::RoundRobin => Arc::new(RoundRobinSelector::default()) as SelectorRef,
};
Ok(MetasrvBuilder::new()

View File

@@ -336,13 +336,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Region's leader peer changed: {}", msg))]
LeaderPeerChanged {
msg: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid arguments: {}", err_msg))]
InvalidArguments {
err_msg: String,
@@ -921,8 +914,7 @@ impl ErrorExt for Error {
| Error::ProcedureNotFound { .. }
| Error::TooManyPartitions { .. }
| Error::TomlFormat { .. }
| Error::HandlerNotFound { .. }
| Error::LeaderPeerChanged { .. } => StatusCode::InvalidArguments,
| Error::HandlerNotFound { .. } => StatusCode::InvalidArguments,
Error::LeaseKeyFromUtf8 { .. }
| Error::LeaseValueFromUtf8 { .. }
| Error::InvalidRegionKeyFromUtf8 { .. }

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use common_error::ext::BoxedError;
use common_meta::ddl::flow_meta::PartitionPeerAllocator;
use common_meta::peer::Peer;
@@ -42,7 +40,6 @@ impl PartitionPeerAllocator for FlowPeerAllocator {
SelectorOptions {
min_required_items: partitions,
allow_duplication: true,
exclude_peer_ids: HashSet::new(),
},
)
.await

View File

@@ -62,9 +62,7 @@ lazy_static! {
register_int_counter!("greptime_meta_region_migration_fail", "meta region migration fail").unwrap();
// The heartbeat stat memory size histogram.
pub static ref METRIC_META_HEARTBEAT_STAT_MEMORY_SIZE: Histogram =
register_histogram!("greptime_meta_heartbeat_stat_memory_size", "meta heartbeat stat memory size", vec![
100.0, 500.0, 1000.0, 1500.0, 2000.0, 3000.0, 5000.0, 10000.0, 20000.0
]).unwrap();
register_histogram!("greptime_meta_heartbeat_stat_memory_size", "meta heartbeat stat memory size").unwrap();
// The heartbeat rate counter.
pub static ref METRIC_META_HEARTBEAT_RATE: IntCounter =
register_int_counter!("greptime_meta_heartbeat_rate", "meta heartbeat arrival rate").unwrap();

View File

@@ -58,9 +58,6 @@ use crate::error::{self, Result};
use crate::metrics::{METRIC_META_REGION_MIGRATION_ERROR, METRIC_META_REGION_MIGRATION_EXECUTE};
use crate::service::mailbox::MailboxRef;
/// The default timeout for region migration.
pub const DEFAULT_REGION_MIGRATION_TIMEOUT: Duration = Duration::from_secs(120);
/// It's shared in each step and available even after recovering.
///
/// It will only be updated/stored after the Red node has succeeded.

View File

@@ -267,8 +267,8 @@ impl RegionMigrationManager {
ensure!(
leader_peer.id == task.from_peer.id,
error::LeaderPeerChangedSnafu {
msg: format!(
error::InvalidArgumentsSnafu {
err_msg: format!(
"Region's leader peer({}) is not the `from_peer`({}), region: {}",
leader_peer.id, task.from_peer.id, task.region_id
),
@@ -507,8 +507,8 @@ mod test {
.await;
let err = manager.submit_procedure(task).await.unwrap_err();
assert_matches!(err, error::Error::LeaderPeerChanged { .. });
assert_eq!(err.to_string(), "Region's leader peer changed: Region's leader peer(3) is not the `from_peer`(1), region: 4398046511105(1024, 1)");
assert_matches!(err, error::Error::InvalidArguments { .. });
assert_eq!(err.to_string(), "Invalid arguments: Region's leader peer(3) is not the `from_peer`(1), region: 4398046511105(1024, 1)");
}
#[tokio::test]

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::{Arc, Mutex};
use std::time::Duration;
@@ -27,7 +26,7 @@ use common_meta::DatanodeId;
use common_runtime::JoinHandle;
use common_telemetry::{error, info, warn};
use common_time::util::current_time_millis;
use error::Error::{LeaderPeerChanged, MigrationRunning, TableRouteNotFound};
use error::Error::{MigrationRunning, TableRouteNotFound};
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use tokio::sync::mpsc::{Receiver, Sender};
@@ -37,9 +36,7 @@ use crate::error::{self, Result};
use crate::failure_detector::PhiAccrualFailureDetectorOptions;
use crate::metasrv::{SelectorContext, SelectorRef};
use crate::procedure::region_migration::manager::RegionMigrationManagerRef;
use crate::procedure::region_migration::{
RegionMigrationProcedureTask, DEFAULT_REGION_MIGRATION_TIMEOUT,
};
use crate::procedure::region_migration::RegionMigrationProcedureTask;
use crate::region::failure_detector::RegionFailureDetector;
use crate::selector::SelectorOptions;
@@ -366,15 +363,15 @@ impl RegionSupervisor {
for (datanode_id, region_id) in migrating_regions {
self.failure_detector.remove(&(datanode_id, region_id));
warn!(
"Removed region failover for region: {region_id}, datanode: {datanode_id} because it's migrating"
);
}
warn!("Detects region failures: {:?}", regions);
for (datanode_id, region_id) in regions {
if let Err(err) = self.do_failover(datanode_id, region_id).await {
error!(err; "Failed to execute region failover for region: {region_id}, datanode: {datanode_id}");
match self.do_failover(datanode_id, region_id).await {
Ok(_) => self.failure_detector.remove(&(datanode_id, region_id)),
Err(err) => {
error!(err; "Failed to execute region failover for region: {region_id}, datanode: {datanode_id}");
}
}
}
}
@@ -404,7 +401,6 @@ impl RegionSupervisor {
SelectorOptions {
min_required_items: 1,
allow_duplication: false,
exclude_peer_ids: HashSet::from([from_peer.id]),
},
)
.await?;
@@ -419,35 +415,13 @@ impl RegionSupervisor {
region_id,
from_peer,
to_peer,
timeout: DEFAULT_REGION_MIGRATION_TIMEOUT,
timeout: Duration::from_secs(60),
};
if let Err(err) = self.region_migration_manager.submit_procedure(task).await {
return match err {
// Returns Ok if it's running or table is dropped.
MigrationRunning { .. } => {
info!(
"Another region migration is running, skip failover for region: {}, datanode: {}",
region_id, datanode_id
);
Ok(())
}
TableRouteNotFound { .. } => {
self.failure_detector.remove(&(datanode_id, region_id));
info!(
"Table route is not found, the table is dropped, removed failover detector for region: {}, datanode: {}",
region_id, datanode_id
);
Ok(())
}
LeaderPeerChanged { .. } => {
self.failure_detector.remove(&(datanode_id, region_id));
info!(
"Region's leader peer changed, removed failover detector for region: {}, datanode: {}",
region_id, datanode_id
);
Ok(())
}
MigrationRunning { .. } | TableRouteNotFound { .. } => Ok(()),
err => Err(err),
};
};

View File

@@ -12,18 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod common;
mod common;
pub mod lease_based;
pub mod load_based;
pub mod round_robin;
#[cfg(test)]
pub(crate) mod test_utils;
mod weight_compute;
pub mod weighted_choose;
use std::collections::HashSet;
mod weighted_choose;
use serde::{Deserialize, Serialize};
use strum::AsRefStr;
use crate::error;
use crate::error::Result;
@@ -42,8 +39,6 @@ pub struct SelectorOptions {
pub min_required_items: usize,
/// Whether duplicates are allowed in the selected result, default false.
pub allow_duplication: bool,
/// The peers to exclude from the selection.
pub exclude_peer_ids: HashSet<u64>,
}
impl Default for SelectorOptions {
@@ -51,13 +46,12 @@ impl Default for SelectorOptions {
Self {
min_required_items: 1,
allow_duplication: false,
exclude_peer_ids: HashSet::new(),
}
}
}
/// [`SelectorType`] refers to the load balancer used when creating tables.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default, AsRefStr)]
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(try_from = "String")]
pub enum SelectorType {
/// The current load balancing is based on the number of regions on each datanode node;

View File

@@ -12,25 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use common_meta::peer::Peer;
use snafu::ensure;
use crate::error;
use crate::error::Result;
use crate::metasrv::SelectTarget;
use crate::selector::weighted_choose::{WeightedChoose, WeightedItem};
use crate::selector::weighted_choose::WeightedChoose;
use crate::selector::SelectorOptions;
/// Filter out the excluded peers from the `weight_array`.
pub fn filter_out_excluded_peers(
weight_array: &mut Vec<WeightedItem<Peer>>,
exclude_peer_ids: &HashSet<u64>,
) {
weight_array.retain(|peer| !exclude_peer_ids.contains(&peer.item.id));
}
/// According to the `opts`, choose peers from the `weight_array` through `weighted_choose`.
pub fn choose_items<W>(opts: &SelectorOptions, weighted_choose: &mut W) -> Result<Vec<Peer>>
where
@@ -90,7 +80,7 @@ mod tests {
use common_meta::peer::Peer;
use crate::selector::common::{choose_items, filter_out_excluded_peers};
use crate::selector::common::choose_items;
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
use crate::selector::SelectorOptions;
@@ -102,35 +92,35 @@ mod tests {
id: 1,
addr: "127.0.0.1:3001".to_string(),
},
weight: 1.0,
weight: 1,
},
WeightedItem {
item: Peer {
id: 2,
addr: "127.0.0.1:3001".to_string(),
},
weight: 1.0,
weight: 1,
},
WeightedItem {
item: Peer {
id: 3,
addr: "127.0.0.1:3001".to_string(),
},
weight: 1.0,
weight: 1,
},
WeightedItem {
item: Peer {
id: 4,
addr: "127.0.0.1:3001".to_string(),
},
weight: 1.0,
weight: 1,
},
WeightedItem {
item: Peer {
id: 5,
addr: "127.0.0.1:3001".to_string(),
},
weight: 1.0,
weight: 1,
},
];
@@ -138,7 +128,6 @@ mod tests {
let opts = SelectorOptions {
min_required_items: i,
allow_duplication: false,
exclude_peer_ids: HashSet::new(),
};
let selected_peers: HashSet<_> =
@@ -153,7 +142,6 @@ mod tests {
let opts = SelectorOptions {
min_required_items: 6,
allow_duplication: false,
exclude_peer_ids: HashSet::new(),
};
let selected_result =
@@ -164,7 +152,6 @@ mod tests {
let opts = SelectorOptions {
min_required_items: i,
allow_duplication: true,
exclude_peer_ids: HashSet::new(),
};
let selected_peers =
@@ -173,30 +160,4 @@ mod tests {
assert_eq!(i, selected_peers.len());
}
}
#[test]
fn test_filter_out_excluded_peers() {
let mut weight_array = vec![
WeightedItem {
item: Peer {
id: 1,
addr: "127.0.0.1:3001".to_string(),
},
weight: 1.0,
},
WeightedItem {
item: Peer {
id: 2,
addr: "127.0.0.1:3002".to_string(),
},
weight: 1.0,
},
];
let exclude_peer_ids = HashSet::from([1]);
filter_out_excluded_peers(&mut weight_array, &exclude_peer_ids);
assert_eq!(weight_array.len(), 1);
assert_eq!(weight_array[0].item.id, 2);
}
}

View File

@@ -17,7 +17,7 @@ use common_meta::peer::Peer;
use crate::error::Result;
use crate::lease;
use crate::metasrv::SelectorContext;
use crate::selector::common::{choose_items, filter_out_excluded_peers};
use crate::selector::common::choose_items;
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
use crate::selector::{Selector, SelectorOptions};
@@ -35,19 +35,18 @@ impl Selector for LeaseBasedSelector {
lease::alive_datanodes(&ctx.meta_peer_client, ctx.datanode_lease_secs).await?;
// 2. compute weight array, but the weight of each item is the same.
let mut weight_array = lease_kvs
let weight_array = lease_kvs
.into_iter()
.map(|(k, v)| WeightedItem {
item: Peer {
id: k.node_id,
addr: v.node_addr.clone(),
},
weight: 1.0,
weight: 1,
})
.collect();
// 3. choose peers by weight_array.
filter_out_excluded_peers(&mut weight_array, &opts.exclude_peer_ids);
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
let selected = choose_items(&opts, &mut weighted_choose)?;

View File

@@ -26,7 +26,7 @@ use crate::error::{self, Result};
use crate::key::{DatanodeLeaseKey, LeaseValue};
use crate::lease;
use crate::metasrv::SelectorContext;
use crate::selector::common::{choose_items, filter_out_excluded_peers};
use crate::selector::common::choose_items;
use crate::selector::weight_compute::{RegionNumsBasedWeightCompute, WeightCompute};
use crate::selector::weighted_choose::RandomWeightedChoose;
use crate::selector::{Selector, SelectorOptions};
@@ -85,10 +85,9 @@ where
};
// 4. compute weight array.
let mut weight_array = self.weight_compute.compute(&stat_kvs);
let weight_array = self.weight_compute.compute(&stat_kvs);
// 5. choose peers by weight_array.
filter_out_excluded_peers(&mut weight_array, &opts.exclude_peer_ids);
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
let selected = choose_items(&opts, &mut weighted_choose)?;

View File

@@ -120,8 +120,6 @@ impl Selector for RoundRobinSelector {
#[cfg(test)]
mod test {
use std::collections::HashSet;
use super::*;
use crate::test_util::{create_selector_context, put_datanodes};
@@ -151,7 +149,6 @@ mod test {
SelectorOptions {
min_required_items: 4,
allow_duplication: true,
exclude_peer_ids: HashSet::new(),
},
)
.await
@@ -168,7 +165,6 @@ mod test {
SelectorOptions {
min_required_items: 2,
allow_duplication: true,
exclude_peer_ids: HashSet::new(),
},
)
.await

View File

@@ -84,7 +84,7 @@ impl WeightCompute for RegionNumsBasedWeightCompute {
.zip(region_nums)
.map(|(peer, region_num)| WeightedItem {
item: peer,
weight: (max_weight - region_num + base_weight) as f64,
weight: (max_weight - region_num + base_weight) as usize,
})
.collect()
}
@@ -148,7 +148,7 @@ mod tests {
2,
);
for weight in weight_array.iter() {
assert_eq!(*expected.get(&weight.item).unwrap(), weight.weight as usize);
assert_eq!(*expected.get(&weight.item).unwrap(), weight.weight,);
}
let mut expected = HashMap::new();

View File

@@ -42,10 +42,10 @@ pub trait WeightedChoose<Item>: Send + Sync {
}
/// The struct represents a weighted item.
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WeightedItem<Item> {
pub item: Item,
pub weight: f64,
pub weight: usize,
}
/// A implementation of weighted balance: random weighted choose.
@@ -87,7 +87,7 @@ where
// unwrap safety: whether weighted_index is none has been checked before.
let item = self
.items
.choose_weighted(&mut rng(), |item| item.weight)
.choose_weighted(&mut rng(), |item| item.weight as f64)
.context(error::ChooseItemsSnafu)?
.item
.clone();
@@ -95,11 +95,11 @@ where
}
fn choose_multiple(&mut self, amount: usize) -> Result<Vec<Item>> {
let amount = amount.min(self.items.iter().filter(|item| item.weight > 0.0).count());
let amount = amount.min(self.items.iter().filter(|item| item.weight > 0).count());
Ok(self
.items
.choose_multiple_weighted(&mut rng(), amount, |item| item.weight)
.choose_multiple_weighted(&mut rng(), amount, |item| item.weight as f64)
.context(error::ChooseItemsSnafu)?
.cloned()
.map(|item| item.item)
@@ -120,12 +120,9 @@ mod tests {
let mut choose = RandomWeightedChoose::new(vec![
WeightedItem {
item: 1,
weight: 100.0,
},
WeightedItem {
item: 2,
weight: 0.0,
weight: 100,
},
WeightedItem { item: 2, weight: 0 },
]);
for _ in 0..100 {

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_meta::ddl::table_meta::PeerAllocator;
@@ -53,7 +51,6 @@ impl MetasrvPeerAllocator {
SelectorOptions {
min_required_items: regions,
allow_duplication: true,
exclude_peer_ids: HashSet::new(),
},
)
.await?;

View File

@@ -36,9 +36,7 @@ use crate::read::Batch;
use crate::row_converter::{CompositeValues, PrimaryKeyCodec};
use crate::sst::file::FileHandle;
use crate::sst::parquet::format::ReadFormat;
use crate::sst::parquet::reader::{
MaybeFilter, RowGroupReader, RowGroupReaderBuilder, SimpleFilterContext,
};
use crate::sst::parquet::reader::{RowGroupReader, RowGroupReaderBuilder, SimpleFilterContext};
/// A range of a parquet SST. Now it is a row group.
/// We can read different file ranges in parallel.
@@ -257,15 +255,8 @@ impl RangeBase {
// Run filter one by one and combine them result
// TODO(ruihang): run primary key filter first. It may short circuit other filters
for filter_ctx in &self.filters {
let filter = match filter_ctx.filter() {
MaybeFilter::Filter(f) => f,
// Column matches.
MaybeFilter::Matched => continue,
// Column doesn't match, filter the entire batch.
MaybeFilter::Pruned => return Ok(None),
};
let result = match filter_ctx.semantic_type() {
for filter in &self.filters {
let result = match filter.semantic_type() {
SemanticType::Tag => {
let pk_values = if let Some(pk_values) = input.pk_values() {
pk_values
@@ -279,20 +270,21 @@ impl RangeBase {
let pk_index = self
.read_format
.metadata()
.primary_key_index(filter_ctx.column_id())
.primary_key_index(filter.column_id())
.unwrap();
v[pk_index]
.1
.try_to_scalar_value(filter_ctx.data_type())
.try_to_scalar_value(filter.data_type())
.context(FieldTypeMismatchSnafu)?
}
CompositeValues::Sparse(v) => {
let v = v.get_or_null(filter_ctx.column_id());
v.try_to_scalar_value(filter_ctx.data_type())
let v = v.get_or_null(filter.column_id());
v.try_to_scalar_value(filter.data_type())
.context(FieldTypeMismatchSnafu)?
}
};
if filter
.filter()
.evaluate_scalar(&pk_value)
.context(FilterRecordBatchSnafu)?
{
@@ -303,17 +295,18 @@ impl RangeBase {
}
}
SemanticType::Field => {
let Some(field_index) =
self.read_format.field_index_by_id(filter_ctx.column_id())
let Some(field_index) = self.read_format.field_index_by_id(filter.column_id())
else {
continue;
};
let field_col = &input.fields()[field_index].data;
filter
.filter()
.evaluate_vector(field_col)
.context(FilterRecordBatchSnafu)?
}
SemanticType::Timestamp => filter
.filter()
.evaluate_vector(input.timestamps())
.context(FilterRecordBatchSnafu)?,
};

View File

@@ -34,7 +34,7 @@ use parquet::arrow::{parquet_to_arrow_field_levels, FieldLevels, ProjectionMask}
use parquet::file::metadata::ParquetMetaData;
use parquet::format::KeyValue;
use snafu::{OptionExt, ResultExt};
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef};
use store_api::metadata::{RegionMetadata, RegionMetadataRef};
use store_api::storage::ColumnId;
use table::predicate::Predicate;
@@ -191,7 +191,6 @@ impl ParquetReaderBuilder {
let file_path = self.file_handle.file_path(&self.file_dir);
let file_size = self.file_handle.meta_ref().file_size;
// Loads parquet metadata of the file.
let parquet_meta = self.read_parquet_metadata(&file_path, file_size).await?;
// Decodes region metadata.
@@ -551,17 +550,11 @@ impl ParquetReaderBuilder {
let row_groups = parquet_meta.row_groups();
let stats =
RowGroupPruningStats::new(row_groups, read_format, self.expected_metadata.clone());
let prune_schema = self
.expected_metadata
.as_ref()
.map(|meta| meta.schema.arrow_schema())
.unwrap_or_else(|| region_meta.schema.arrow_schema());
// Here we use the schema of the SST to build the physical expression. If the column
// in the SST doesn't have the same column id as the column in the expected metadata,
// we will get a None statistics for that column.
let res = predicate
.prune_with_stats(&stats, prune_schema)
.prune_with_stats(&stats, region_meta.schema.arrow_schema())
.iter()
.zip(0..parquet_meta.num_row_groups())
.filter_map(|(mask, row_group)| {
@@ -1016,20 +1009,10 @@ impl ReaderState {
}
}
/// The filter to evaluate or the prune result of the default value.
pub(crate) enum MaybeFilter {
/// The filter to evaluate.
Filter(SimpleFilterEvaluator),
/// The filter matches the default value.
Matched,
/// The filter is pruned.
Pruned,
}
/// Context to evaluate the column filter for a parquet file.
/// Context to evaluate the column filter.
pub(crate) struct SimpleFilterContext {
/// Filter to evaluate.
filter: MaybeFilter,
filter: SimpleFilterEvaluator,
/// Id of the column to evaluate.
column_id: ColumnId,
/// Semantic type of the column.
@@ -1049,38 +1032,22 @@ impl SimpleFilterContext {
expr: &Expr,
) -> Option<Self> {
let filter = SimpleFilterEvaluator::try_new(expr)?;
let (column_metadata, maybe_filter) = match expected_meta {
let column_metadata = match expected_meta {
Some(meta) => {
// Gets the column metadata from the expected metadata.
let column = meta.column_by_name(filter.column_name())?;
// Checks if the column is present in the SST metadata. We still uses the
// column from the expected metadata.
match sst_meta.column_by_id(column.column_id) {
Some(sst_column) => {
debug_assert_eq!(column.semantic_type, sst_column.semantic_type);
let sst_column = sst_meta.column_by_id(column.column_id)?;
debug_assert_eq!(column.semantic_type, sst_column.semantic_type);
(column, MaybeFilter::Filter(filter))
}
None => {
// If the column is not present in the SST metadata, we evaluate the filter
// against the default value of the column.
// If we can't evaluate the filter, we return None.
if pruned_by_default(&filter, column)? {
(column, MaybeFilter::Pruned)
} else {
(column, MaybeFilter::Matched)
}
}
}
}
None => {
let column = sst_meta.column_by_name(filter.column_name())?;
(column, MaybeFilter::Filter(filter))
column
}
None => sst_meta.column_by_name(filter.column_name())?,
};
Some(Self {
filter: maybe_filter,
filter,
column_id: column_metadata.column_id,
semantic_type: column_metadata.semantic_type,
data_type: column_metadata.column_schema.data_type.clone(),
@@ -1088,7 +1055,7 @@ impl SimpleFilterContext {
}
/// Returns the filter to evaluate.
pub(crate) fn filter(&self) -> &MaybeFilter {
pub(crate) fn filter(&self) -> &SimpleFilterEvaluator {
&self.filter
}
@@ -1108,17 +1075,6 @@ impl SimpleFilterContext {
}
}
/// Prune a column by its default value.
/// Returns false if we can't create the default value or evaluate the filter.
fn pruned_by_default(filter: &SimpleFilterEvaluator, column: &ColumnMetadata) -> Option<bool> {
let value = column.column_schema.create_default().ok().flatten()?;
let scalar_value = value
.try_to_scalar_value(&column.column_schema.data_type)
.ok()?;
let matches = filter.evaluate_scalar(&scalar_value).ok()?;
Some(!matches)
}
type RowGroupMap = BTreeMap<usize, Option<RowSelection>>;
/// Parquet batch reader to read our SST format.

View File

@@ -16,11 +16,10 @@
use std::borrow::Borrow;
use std::collections::HashSet;
use std::sync::Arc;
use datafusion::physical_optimizer::pruning::PruningStatistics;
use datafusion_common::{Column, ScalarValue};
use datatypes::arrow::array::{ArrayRef, BooleanArray, UInt64Array};
use datatypes::arrow::array::{ArrayRef, BooleanArray};
use parquet::file::metadata::RowGroupMetaData;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
@@ -55,62 +54,25 @@ impl<'a, T> RowGroupPruningStats<'a, T> {
}
/// Returns the column id of specific column name if we need to read it.
/// Prefers the column id in the expected metadata if it exists.
fn column_id_to_prune(&self, name: &str) -> Option<ColumnId> {
let metadata = self
.expected_metadata
.as_ref()
.unwrap_or_else(|| self.read_format.metadata());
// Only use stats when the column to read has the same id as the column in the SST.
metadata.column_by_name(name).map(|col| col.column_id)
}
/// Returns the default value of all row groups for `column` according to the metadata.
fn compat_default_value(&self, column: &str) -> Option<ArrayRef> {
let metadata = self.expected_metadata.as_ref()?;
let col_metadata = metadata.column_by_name(column)?;
col_metadata
.column_schema
.create_default_vector(self.row_groups.len())
.unwrap_or(None)
.map(|vector| vector.to_arrow_array())
}
}
impl<T: Borrow<RowGroupMetaData>> RowGroupPruningStats<'_, T> {
/// Returns the null count of all row groups for `column` according to the metadata.
fn compat_null_count(&self, column: &str) -> Option<ArrayRef> {
let metadata = self.expected_metadata.as_ref()?;
let col_metadata = metadata.column_by_name(column)?;
let value = col_metadata
.column_schema
.create_default()
.unwrap_or(None)?;
let values = self.row_groups.iter().map(|meta| {
if value.is_null() {
u64::try_from(meta.borrow().num_rows()).ok()
} else {
Some(0)
}
});
Some(Arc::new(UInt64Array::from_iter(values)))
}
}
impl<T: Borrow<RowGroupMetaData>> PruningStatistics for RowGroupPruningStats<'_, T> {
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
let column_id = self.column_id_to_prune(&column.name)?;
match self.read_format.min_values(self.row_groups, column_id) {
Some(values) => Some(values),
None => self.compat_default_value(&column.name),
}
self.read_format.min_values(self.row_groups, column_id)
}
fn max_values(&self, column: &Column) -> Option<ArrayRef> {
let column_id = self.column_id_to_prune(&column.name)?;
match self.read_format.max_values(self.row_groups, column_id) {
Some(values) => Some(values),
None => self.compat_default_value(&column.name),
}
self.read_format.max_values(self.row_groups, column_id)
}
fn num_containers(&self) -> usize {
@@ -118,9 +80,7 @@ impl<T: Borrow<RowGroupMetaData>> PruningStatistics for RowGroupPruningStats<'_,
}
fn null_counts(&self, column: &Column) -> Option<ArrayRef> {
let Some(column_id) = self.column_id_to_prune(&column.name) else {
return self.compat_null_count(&column.name);
};
let column_id = self.column_id_to_prune(&column.name)?;
self.read_format.null_counts(self.row_groups, column_id)
}

View File

@@ -20,6 +20,7 @@ pub mod error;
pub mod expr_helper;
pub mod flow;
pub mod insert;
pub mod metadata;
pub mod metrics;
pub mod procedure;
pub mod region_req_factory;

View File

@@ -0,0 +1,53 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_function::handlers::MetadataSnapshotHandler;
use common_meta::snapshot::MetadataSnapshotManager;
use common_query::error as query_error;
use common_query::error::Result as QueryResult;
use snafu::ResultExt;
/// The operator of the metadata snapshot.
pub struct MetadataSnapshotOperator {
operator: MetadataSnapshotManager,
}
impl MetadataSnapshotOperator {
pub fn new(operator: MetadataSnapshotManager) -> Self {
Self { operator }
}
}
#[async_trait]
impl MetadataSnapshotHandler for MetadataSnapshotOperator {
async fn dump(&self, path: &str, filename: &str) -> QueryResult<String> {
self.operator
.dump(path, filename)
.await
.map_err(BoxedError::new)
.map(|(file, _)| file)
.context(query_error::MetadataSnapshotSnafu)
}
async fn restore(&self, path: &str, filename: &str) -> QueryResult<u64> {
let filepath = format!("{}{}", path, filename);
self.operator
.restore(&filepath)
.await
.map_err(BoxedError::new)
.context(query_error::MetadataSnapshotSnafu)
}
}

View File

@@ -26,7 +26,6 @@ use common_catalog::consts::{is_readonly_schema, DEFAULT_CATALOG_NAME, DEFAULT_S
use common_catalog::{format_full_flow_name, format_full_table_name};
use common_error::ext::BoxedError;
use common_meta::cache_invalidator::Context;
use common_meta::ddl::create_flow::FlowType;
use common_meta::ddl::ExecutorContext;
use common_meta::instruction::CacheIdent;
use common_meta::key::schema_name::{SchemaName, SchemaNameKey};
@@ -39,8 +38,6 @@ use common_meta::rpc::router::{Partition, Partition as MetaPartition};
use common_query::Output;
use common_telemetry::{debug, info, tracing};
use common_time::Timezone;
use datafusion_common::tree_node::TreeNodeVisitor;
use datafusion_expr::LogicalPlan;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{RawSchema, Schema};
use datatypes::value::Value;
@@ -48,7 +45,7 @@ use lazy_static::lazy_static;
use partition::expr::{Operand, PartitionExpr, RestrictedOp};
use partition::multi_dim::MultiDimPartitionRule;
use partition::partition::{PartitionBound, PartitionDef};
use query::parser::{QueryLanguageParser, QueryStatement};
use query::parser::QueryStatement;
use query::plan::extract_and_rewrite_full_table_names;
use query::query_engine::DefaultSerializer;
use query::sql::create_table_stmt;
@@ -72,14 +69,13 @@ use table::table_name::TableName;
use table::TableRef;
use crate::error::{
self, AlterExprToRequestSnafu, BuildDfLogicalPlanSnafu, CatalogSnafu, ColumnDataTypeSnafu,
ColumnNotFoundSnafu, ConvertSchemaSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu,
DeserializePartitionSnafu, EmptyDdlExprSnafu, ExternalSnafu, ExtractTableNamesSnafu,
FlowNotFoundSnafu, InvalidPartitionRuleSnafu, InvalidPartitionSnafu, InvalidSqlSnafu,
InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu, ParseSqlValueSnafu, Result,
SchemaInUseSnafu, SchemaNotFoundSnafu, SchemaReadOnlySnafu, SubstraitCodecSnafu,
TableAlreadyExistsSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu,
UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu,
self, AlterExprToRequestSnafu, CatalogSnafu, ColumnDataTypeSnafu, ColumnNotFoundSnafu,
ConvertSchemaSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu, DeserializePartitionSnafu,
EmptyDdlExprSnafu, ExtractTableNamesSnafu, FlowNotFoundSnafu, InvalidPartitionRuleSnafu,
InvalidPartitionSnafu, InvalidSqlSnafu, InvalidTableNameSnafu, InvalidViewNameSnafu,
InvalidViewStmtSnafu, ParseSqlValueSnafu, Result, SchemaInUseSnafu, SchemaNotFoundSnafu,
SchemaReadOnlySnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu, TableMetadataManagerSnafu,
TableNotFoundSnafu, UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu,
};
use crate::expr_helper;
use crate::statement::show::create_partitions_stmt;
@@ -368,18 +364,6 @@ impl StatementExecutor {
expr: CreateFlowExpr,
query_context: QueryContextRef,
) -> Result<SubmitDdlTaskResponse> {
let flow_type = self
.determine_flow_type(&expr.sql, query_context.clone())
.await?;
info!("determined flow={} type: {:#?}", expr.flow_name, flow_type);
let expr = {
let mut expr = expr;
expr.flow_options
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type.to_string());
expr
};
let task = CreateFlowTask::try_from(PbCreateFlowTask {
create_flow: Some(expr),
})
@@ -395,55 +379,6 @@ impl StatementExecutor {
.context(error::ExecuteDdlSnafu)
}
/// Determine the flow type based on the SQL query
///
/// If it contains aggregation or distinct, then it is a batch flow, otherwise it is a streaming flow
async fn determine_flow_type(&self, sql: &str, query_ctx: QueryContextRef) -> Result<FlowType> {
let engine = &self.query_engine;
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = engine
.planner()
.plan(&stmt, query_ctx)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
/// Visitor to find aggregation or distinct
struct FindAggr {
is_aggr: bool,
}
impl TreeNodeVisitor<'_> for FindAggr {
type Node = LogicalPlan;
fn f_down(
&mut self,
node: &Self::Node,
) -> datafusion_common::Result<datafusion_common::tree_node::TreeNodeRecursion>
{
match node {
LogicalPlan::Aggregate(_) | LogicalPlan::Distinct(_) => {
self.is_aggr = true;
return Ok(datafusion_common::tree_node::TreeNodeRecursion::Stop);
}
_ => (),
}
Ok(datafusion_common::tree_node::TreeNodeRecursion::Continue)
}
}
let mut find_aggr = FindAggr { is_aggr: false };
plan.visit_with_subqueries(&mut find_aggr)
.context(BuildDfLogicalPlanSnafu)?;
if find_aggr.is_aggr {
Ok(FlowType::Batching)
} else {
Ok(FlowType::Streaming)
}
}
#[tracing::instrument(skip_all)]
pub async fn create_view(
&self,

View File

@@ -25,7 +25,6 @@ use api::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension, Semant
use coerce::{coerce_columns, coerce_value};
use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
use itertools::Itertools;
use once_cell::sync::OnceCell;
use serde_json::Number;
use crate::error::{
@@ -55,12 +54,8 @@ pub struct GreptimeTransformer {
/// Parameters that can be used to configure the greptime pipelines.
#[derive(Debug, Clone, Default)]
pub struct GreptimePipelineParams {
/// The original options for configuring the greptime pipelines.
/// This should not be used directly, instead, use the parsed shortcut option values.
options: HashMap<String, String>,
/// Parsed shortcut option values
pub flatten_json_object: OnceCell<bool>,
/// The options for configuring the greptime pipelines.
pub options: HashMap<String, String>,
}
impl GreptimePipelineParams {
@@ -75,20 +70,15 @@ impl GreptimePipelineParams {
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect::<HashMap<String, String>>();
Self {
options,
flatten_json_object: OnceCell::new(),
}
Self { options }
}
/// Whether to flatten the JSON object.
pub fn flatten_json_object(&self) -> bool {
*self.flatten_json_object.get_or_init(|| {
self.options
.get("flatten_json_object")
.map(|v| v == "true")
.unwrap_or(false)
})
self.options
.get("flatten_json_object")
.map(|v| v == "true")
.unwrap_or(false)
}
}

View File

@@ -436,8 +436,7 @@ fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<Value
None => CoerceUnsupportedEpochTypeSnafu { ty: "String" }.fail(),
},
Value::Array(_) | Value::Map(_) => CoerceStringToTypeSnafu {
s,
Value::Array(_) | Value::Map(_) => CoerceJsonTypeToSnafu {
ty: transform.type_.to_str_type(),
}
.fail(),

View File

@@ -588,6 +588,7 @@ mod tests {
None,
None,
None,
None,
false,
QueryOptions::default(),
)

View File

@@ -295,6 +295,7 @@ mod tests {
None,
None,
None,
None,
false,
Default::default(),
)

View File

@@ -25,7 +25,8 @@ use common_base::Plugins;
use common_function::function::FunctionRef;
use common_function::function_registry::FUNCTION_REGISTRY;
use common_function::handlers::{
FlowServiceHandlerRef, ProcedureServiceHandlerRef, TableMutationHandlerRef,
FlowServiceHandlerRef, MetadataSnapshotHandlerRef, ProcedureServiceHandlerRef,
TableMutationHandlerRef,
};
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
use common_query::Output;
@@ -100,12 +101,14 @@ pub struct QueryEngineFactory {
}
impl QueryEngineFactory {
#[allow(clippy::too_many_arguments)]
pub fn new(
catalog_manager: CatalogManagerRef,
region_query_handler: Option<RegionQueryHandlerRef>,
table_mutation_handler: Option<TableMutationHandlerRef>,
procedure_service_handler: Option<ProcedureServiceHandlerRef>,
flow_service_handler: Option<FlowServiceHandlerRef>,
metadata_snapshot_handler: Option<MetadataSnapshotHandlerRef>,
with_dist_planner: bool,
options: QueryOptions,
) -> Self {
@@ -115,6 +118,7 @@ impl QueryEngineFactory {
table_mutation_handler,
procedure_service_handler,
flow_service_handler,
metadata_snapshot_handler,
with_dist_planner,
Default::default(),
options,
@@ -128,6 +132,7 @@ impl QueryEngineFactory {
table_mutation_handler: Option<TableMutationHandlerRef>,
procedure_service_handler: Option<ProcedureServiceHandlerRef>,
flow_service_handler: Option<FlowServiceHandlerRef>,
metadata_snapshot_handler: Option<MetadataSnapshotHandlerRef>,
with_dist_planner: bool,
plugins: Plugins,
options: QueryOptions,
@@ -138,6 +143,7 @@ impl QueryEngineFactory {
table_mutation_handler,
procedure_service_handler,
flow_service_handler,
metadata_snapshot_handler,
with_dist_planner,
plugins.clone(),
options,
@@ -178,6 +184,7 @@ mod tests {
None,
None,
None,
None,
false,
QueryOptions::default(),
);

View File

@@ -84,6 +84,7 @@ impl QueryEngineContext {
None,
None,
None,
None,
false,
Plugins::default(),
QueryOptions::default(),

View File

@@ -184,6 +184,7 @@ mod tests {
None,
None,
None,
None,
false,
QueryOptions::default(),
);

View File

@@ -21,7 +21,8 @@ use catalog::CatalogManagerRef;
use common_base::Plugins;
use common_function::function::FunctionRef;
use common_function::handlers::{
FlowServiceHandlerRef, ProcedureServiceHandlerRef, TableMutationHandlerRef,
FlowServiceHandlerRef, MetadataSnapshotHandlerRef, ProcedureServiceHandlerRef,
TableMutationHandlerRef,
};
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
use common_function::state::FunctionState;
@@ -91,6 +92,7 @@ impl QueryEngineState {
table_mutation_handler: Option<TableMutationHandlerRef>,
procedure_service_handler: Option<ProcedureServiceHandlerRef>,
flow_service_handler: Option<FlowServiceHandlerRef>,
metadata_snapshot_handler: Option<MetadataSnapshotHandlerRef>,
with_dist_planner: bool,
plugins: Plugins,
options: QueryOptionsNew,
@@ -181,6 +183,7 @@ impl QueryEngineState {
table_mutation_handler,
procedure_service_handler,
flow_service_handler,
metadata_snapshot_handler,
}),
aggregate_functions: Arc::new(RwLock::new(HashMap::new())),
extension_rules,

View File

@@ -670,6 +670,7 @@ mod test {
None,
None,
None,
None,
false,
QueryOptions::default(),
)

View File

@@ -53,6 +53,7 @@ pub fn new_query_engine_with_table(table: TableRef) -> QueryEngineRef {
None,
None,
None,
None,
false,
QueryOptions::default(),
)

View File

@@ -50,6 +50,7 @@ async fn test_datafusion_query_engine() -> Result<()> {
None,
None,
None,
None,
false,
QueryOptionsNew::default(),
);
@@ -137,6 +138,7 @@ async fn test_query_validate() -> Result<()> {
None,
None,
None,
None,
false,
plugins,
QueryOptionsNew::default(),

View File

@@ -109,6 +109,7 @@ fn create_test_engine() -> TimeRangeTester {
None,
None,
None,
None,
false,
QueryOptions::default(),
)

View File

@@ -3156,8 +3156,7 @@ mod test {
let fetch_bound = 100;
let mut rng = fastrand::Rng::new();
let rng_seed = rng.u64(..);
rng.seed(rng_seed);
rng.seed(1337);
let mut bound_val = None;
// construct testcases
type CmpFn<T> = Box<dyn FnMut(&T, &T) -> std::cmp::Ordering>;
@@ -3300,8 +3299,8 @@ mod test {
}
assert_eq!(
res_concat, expected_concat,
"case failed, case id: {}, rng seed: {}",
case_id, rng_seed
"case failed, case id: {}",
case_id
);
}
}

Some files were not shown because too many files have changed in this diff Show More