Compare commits

..

14 Commits

Author SHA1 Message Date
Ruihang Xia
f3a02effa7 assign partition_ranges
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-04-08 21:36:29 +08:00
Ruihang Xia
52f9fc25ba Revert "feat: keep parallelize_scan unchanged"
This reverts commit 96ba00d175.
2025-04-08 21:16:09 +08:00
evenyag
214a16565a chore: update comment 2025-04-08 21:00:25 +08:00
evenyag
21790a607e feat: use smallvec 2025-04-08 20:54:34 +08:00
evenyag
b33d8c1bad fix: include build merge reader cost to scan cost 2025-04-08 20:52:19 +08:00
evenyag
916e1c2d9e fix: address compiler errors 2025-04-08 20:51:55 +08:00
evenyag
96ba00d175 feat: keep parallelize_scan unchanged 2025-04-08 20:36:48 +08:00
evenyag
7173401732 fix: use series scan in PerSeries distribution 2025-04-08 20:34:08 +08:00
evenyag
17c797a6d0 refactor: remove per series scan from SeqScan 2025-04-08 20:34:06 +08:00
evenyag
c44ba1aa69 feat: parallelize PerSeries 2025-04-08 20:26:50 +08:00
evenyag
843d33f9d0 feat: use series scan when distribution is PerSeries 2025-04-08 20:26:50 +08:00
evenyag
b74e2a7d9b feat: implement scan logic of each partition 2025-04-08 20:26:47 +08:00
evenyag
4a79c1527d chore: add to scanner enum 2025-04-08 20:24:54 +08:00
evenyag
b7a6ff9cc3 chore: basic methods for SeriesScan 2025-04-08 20:24:54 +08:00
576 changed files with 17463 additions and 36427 deletions

15
.coderabbit.yaml Normal file
View File

@@ -0,0 +1,15 @@
# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
language: "en-US"
early_access: false
reviews:
profile: "chill"
request_changes_workflow: false
high_level_summary: true
poem: true
review_status: true
collapse_walkthrough: false
auto_review:
enabled: false
drafts: false
chat:
auto_reply: true

View File

@@ -2,14 +2,13 @@ meta:
configData: |- configData: |-
[runtime] [runtime]
global_rt_size = 4 global_rt_size = 4
[wal] [wal]
provider = "kafka" provider = "kafka"
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"] broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
num_topics = 3 num_topics = 3
auto_prune_interval = "30s"
trigger_flush_threshold = 100
[datanode] [datanode]
[datanode.client] [datanode.client]
timeout = "120s" timeout = "120s"
@@ -23,7 +22,6 @@ datanode:
provider = "kafka" provider = "kafka"
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"] broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
linger = "2ms" linger = "2ms"
overwrite_entry_start_id = true
frontend: frontend:
configData: |- configData: |-
[runtime] [runtime]

View File

@@ -25,7 +25,7 @@ function create_version() {
fi fi
# Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build. # Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
# It will be like 'nightly-20230808-7d0d8dc6'. # It will be like 'nigtly-20230808-7d0d8dc6'.
if [ "$NEXT_RELEASE_VERSION" = nightly ]; then if [ "$NEXT_RELEASE_VERSION" = nightly ]; then
echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)" echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
exit 0 exit 0
@@ -60,9 +60,9 @@ function create_version() {
} }
# You can run as following examples: # You can run as following examples:
# GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly GITHUB_REF_NAME=v0.3.0 ./create-version.sh # GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
# GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh # GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh # GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh # GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
# GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh # GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
create_version create_version

View File

@@ -21,6 +21,32 @@ jobs:
run: sudo apt-get install -y jq run: sudo apt-get install -y jq
# Make the check.sh script executable # Make the check.sh script executable
- name: Check grafana dashboards - name: Make check.sh executable
run: chmod +x grafana/check.sh
# Run the check.sh script
- name: Run check.sh
run: ./grafana/check.sh
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
- name: Check if this is a pull request
id: check-pr
run: | run: |
make check-dashboards if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "is_pull_request=true" >> $GITHUB_OUTPUT
else
echo "is_pull_request=false" >> $GITHUB_OUTPUT
fi
# Make the summary.sh script executable
- name: Make summary.sh executable
if: steps.check-pr.outputs.is_pull_request == 'true'
run: chmod +x grafana/summary.sh
# Run the summary.sh script and add its output to the GitHub Job Summary
- name: Run summary.sh and add to Job Summary
if: steps.check-pr.outputs.is_pull_request == 'true'
run: |
SUMMARY=$(./grafana/summary.sh)
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY

View File

@@ -317,7 +317,7 @@ jobs:
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }} image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }} image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }} push-latest-tag: true
- name: Set build image result - name: Set build image result
id: set-build-image-result id: set-build-image-result
@@ -364,7 +364,7 @@ jobs:
dev-mode: false dev-mode: false
upload-to-s3: true upload-to-s3: true
update-version-info: true update-version-info: true
push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }} push-latest-tag: true
publish-github-release: publish-github-release:
name: Create GitHub release and upload artifacts name: Create GitHub release and upload artifacts

1373
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -77,6 +77,7 @@ clippy.print_stdout = "warn"
clippy.print_stderr = "warn" clippy.print_stderr = "warn"
clippy.dbg_macro = "warn" clippy.dbg_macro = "warn"
clippy.implicit_clone = "warn" clippy.implicit_clone = "warn"
clippy.readonly_write_lock = "allow"
rust.unknown_lints = "deny" rust.unknown_lints = "deny"
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
@@ -89,11 +90,11 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329 # See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", features = ["compile-time-rng"] } ahash = { version = "0.8", features = ["compile-time-rng"] }
aquamarine = "0.6" aquamarine = "0.6"
arrow = { version = "54.2", features = ["prettyprint"] } arrow = { version = "53.0.0", features = ["prettyprint"] }
arrow-array = { version = "54.2", default-features = false, features = ["chrono-tz"] } arrow-array = { version = "53.0.0", default-features = false, features = ["chrono-tz"] }
arrow-flight = "54.2" arrow-flight = "53.0"
arrow-ipc = { version = "54.2", default-features = false, features = ["lz4", "zstd"] } arrow-ipc = { version = "53.0.0", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "54.2", features = ["serde"] } arrow-schema = { version = "53.0", features = ["serde"] }
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
# Remember to update axum-extra, axum-macros when updating axum # Remember to update axum-extra, axum-macros when updating axum
@@ -112,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] }
config = "0.13.0" config = "0.13.0"
crossbeam-utils = "0.8" crossbeam-utils = "0.8"
dashmap = "6.1" dashmap = "6.1"
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
deadpool = "0.12" deadpool = "0.12"
deadpool-postgres = "0.14" deadpool-postgres = "0.14"
derive_builder = "0.20" derive_builder = "0.20"
@@ -129,7 +130,7 @@ etcd-client = "0.14"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "e82b0158cd38d4021edb4e4c0ae77f999051e62f" } greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "dd4a1996982534636734674db66e44464b0c0d83" }
hex = "0.4" hex = "0.4"
http = "1" http = "1"
humantime = "2.1" humantime = "2.1"
@@ -147,7 +148,6 @@ moka = "0.12"
nalgebra = "0.33" nalgebra = "0.33"
notify = "8.0" notify = "8.0"
num_cpus = "1.16" num_cpus = "1.16"
object_store_opendal = "0.50"
once_cell = "1.18" once_cell = "1.18"
opentelemetry-proto = { version = "0.27", features = [ opentelemetry-proto = { version = "0.27", features = [
"gen-tonic", "gen-tonic",
@@ -157,11 +157,11 @@ opentelemetry-proto = { version = "0.27", features = [
"logs", "logs",
] } ] }
parking_lot = "0.12" parking_lot = "0.12"
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] } parquet = { version = "53.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0" paste = "1.0"
pin-project = "1.0" pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] } prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.5.1", features = ["ser"] } promql-parser = { version = "0.5", features = ["ser"] }
prost = "0.13" prost = "0.13"
raft-engine = { version = "0.4.1", default-features = false } raft-engine = { version = "0.4.1", default-features = false }
rand = "0.9" rand = "0.9"
@@ -191,18 +191,19 @@ simd-json = "0.15"
similar-asserts = "1.6.0" similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] } smallvec = { version = "1", features = ["serde"] }
snafu = "0.8" snafu = "0.8"
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [
"visitor",
"serde",
] } # branch = "v0.54.x"
sqlx = { version = "0.8", features = [ sqlx = { version = "0.8", features = [
"runtime-tokio-rustls", "runtime-tokio-rustls",
"mysql", "mysql",
"postgres", "postgres",
"chrono", "chrono",
] } ] }
strum = { version = "0.27", features = ["derive"] }
sysinfo = "0.33" sysinfo = "0.33"
# on branch v0.52.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
"visitor",
"serde",
] } # on branch v0.44.x
strum = { version = "0.27", features = ["derive"] }
tempfile = "3" tempfile = "3"
tokio = { version = "1.40", features = ["full"] } tokio = { version = "1.40", features = ["full"] }
tokio-postgres = "0.7" tokio-postgres = "0.7"
@@ -269,9 +270,6 @@ metric-engine = { path = "src/metric-engine" }
mito2 = { path = "src/mito2" } mito2 = { path = "src/mito2" }
object-store = { path = "src/object-store" } object-store = { path = "src/object-store" }
operator = { path = "src/operator" } operator = { path = "src/operator" }
otel-arrow-rust = { git = "https://github.com/open-telemetry/otel-arrow", rev = "5d551412d2a12e689cde4d84c14ef29e36784e51", features = [
"server",
] }
partition = { path = "src/partition" } partition = { path = "src/partition" }
pipeline = { path = "src/pipeline" } pipeline = { path = "src/pipeline" }
plugins = { path = "src/plugins" } plugins = { path = "src/plugins" }

View File

@@ -32,10 +32,6 @@ ifneq ($(strip $(BUILD_JOBS)),)
NEXTEST_OPTS += --build-jobs=${BUILD_JOBS} NEXTEST_OPTS += --build-jobs=${BUILD_JOBS}
endif endif
ifneq ($(strip $(BUILD_JOBS)),)
SQLNESS_OPTS += --jobs ${BUILD_JOBS}
endif
ifneq ($(strip $(CARGO_PROFILE)),) ifneq ($(strip $(CARGO_PROFILE)),)
CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE} CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE}
endif endif
@@ -197,7 +193,6 @@ fix-clippy: ## Fix clippy violations.
fmt-check: ## Check code format. fmt-check: ## Check code format.
cargo fmt --all -- --check cargo fmt --all -- --check
python3 scripts/check-snafu.py python3 scripts/check-snafu.py
python3 scripts/check-super-imports.py
.PHONY: start-etcd .PHONY: start-etcd
start-etcd: ## Start single node etcd for testing purpose. start-etcd: ## Start single node etcd for testing purpose.
@@ -222,16 +217,6 @@ start-cluster: ## Start the greptimedb cluster with etcd by using docker compose
stop-cluster: ## Stop the greptimedb cluster that created by docker compose. stop-cluster: ## Stop the greptimedb cluster that created by docker compose.
docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml stop docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml stop
##@ Grafana
.PHONY: check-dashboards
check-dashboards: ## Check the Grafana dashboards.
@./grafana/scripts/check.sh
.PHONY: dashboards
dashboards: ## Generate the Grafana dashboards for standalone mode and intermediate dashboards.
@./grafana/scripts/gen-dashboards.sh
##@ Docs ##@ Docs
config-docs: ## Generate configuration documentation from toml files. config-docs: ## Generate configuration documentation from toml files.
docker run --rm \ docker run --rm \

View File

@@ -6,7 +6,7 @@
</picture> </picture>
</p> </p>
<h2 align="center">Real-Time & Cloud-Native Observability Database<br/>for metrics, logs, and traces</h2> <h2 align="center">Unified & Cost-Effective Observerability Database for Metrics, Logs, and Events</h2>
<div align="center"> <div align="center">
<h3 align="center"> <h3 align="center">
@@ -62,7 +62,7 @@
## Introduction ## Introduction
**GreptimeDB** is an open-source, cloud-native, unified & cost-effective observability database for **Metrics**, **Logs**, and **Traces**. You can gain real-time insights from Edge to Cloud at Any Scale. **GreptimeDB** is an open-source unified & cost-effective observerability database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
## News ## News
@@ -70,27 +70,27 @@
## Why GreptimeDB ## Why GreptimeDB
Our core developers have been building observability data platforms for years. Based on our best practices, GreptimeDB was born to give you: Our core developers have been building observerability data platforms for years. Based on our best practices, GreptimeDB was born to give you:
* **Unified Processing of Observability Data** * **Unified Processing of Metrics, Logs, and Events**
A unified database that treats metrics, logs, and traces as timestamped wide events with context, supporting [SQL](https://docs.greptime.com/user-guide/query-data/sql)/[PromQL](https://docs.greptime.com/user-guide/query-data/promql) queries and [stream processing](https://docs.greptime.com/user-guide/flow-computation/overview) to simplify complex data stacks. GreptimeDB unifies observerability data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
* **High Performance and Cost-effective**
Written in Rust, combines a distributed query engine with [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index) (inverted, fulltext, skip data, and vector) and optimized columnar storage to deliver sub-second responses on petabyte-scale data and high-cost efficiency.
* **Cloud-native Distributed Database** * **Cloud-native Distributed Database**
Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer. Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.
* **Developer-Friendly** * **Performance and Cost-effective**
Access standardized SQL/PromQL interfaces through built-in web dashboard, REST API, and MySQL/PostgreSQL protocols. Supports widely adopted data ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview) for seamless migration and integration. Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
* **Flexible Deployment Options** * **Cloud-Edge Collaboration**
Deploy GreptimeDB anywhere from ARM-based edge devices to cloud environments with unified APIs and bandwidth-efficient data synchronization. Query edge and cloud data seamlessly through identical APIs. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/). GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
* **Multi-protocol Ingestion, SQL & PromQL Ready**
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc. Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
For more detailed info please read [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb). For more detailed info please read [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb).
@@ -233,5 +233,3 @@ Special thanks to all the contributors who have propelled GreptimeDB forward. Fo
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/). - GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
- [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer. - [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDB's meta service is based on [etcd](https://etcd.io/). - GreptimeDB's meta service is based on [etcd](https://etcd.io/).
<img alt="Known Users" src="https://greptime.com/logo/img/users.png"/>

View File

@@ -96,8 +96,6 @@
| `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. | | `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
| `flow` | -- | -- | flow engine options. | | `flow` | -- | -- | flow engine options. |
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. | | `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
| `query` | -- | -- | The query engine options. |
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
| `storage` | -- | -- | The data storage options. | | `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. | | `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. | | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -272,8 +270,6 @@
| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. | | `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. | | `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
| `meta_client.metadata_cache_tti` | String | `5m` | -- | | `meta_client.metadata_cache_tti` | String | `5m` | -- |
| `query` | -- | -- | The query engine options. |
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
| `datanode` | -- | -- | Datanode options. | | `datanode` | -- | -- | Datanode options. |
| `datanode.client` | -- | -- | Datanode client options. | | `datanode.client` | -- | -- | Datanode client options. |
| `datanode.client.connect_timeout` | String | `10s` | -- | | `datanode.client.connect_timeout` | String | `10s` | -- |
@@ -319,7 +315,6 @@
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". | | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. | | `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). | | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. | | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. | | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
| `runtime` | -- | -- | The runtime options. | | `runtime` | -- | -- | The runtime options. |
@@ -344,9 +339,6 @@
| `wal.provider` | String | `raft_engine` | -- | | `wal.provider` | String | `raft_engine` | -- |
| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. | | `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` | | `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
| `wal.auto_prune_interval` | String | `0s` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically. |
| `wal.trigger_flush_threshold` | Integer | `0` | The threshold to trigger a flush operation of a region in automatically WAL pruning.<br/>Metasrv will send a flush request to flush the region when:<br/>`trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`<br/>where:<br/>- `prunable_entry_id` is the maximum entry id that can be pruned of the region.<br/>- `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.<br/>Set to `0` to disable the flush operation. |
| `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning. |
| `wal.num_topics` | Integer | `64` | Number of topics. | | `wal.num_topics` | Integer | `64` | Number of topics. |
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) | | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. | | `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
@@ -437,8 +429,6 @@
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. | | `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. | | `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. | | `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `query` | -- | -- | The query engine options. |
| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
| `storage` | -- | -- | The data storage options. | | `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. | | `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. | | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |

View File

@@ -243,12 +243,6 @@ overwrite_entry_start_id = false
# credential = "base64-credential" # credential = "base64-credential"
# endpoint = "https://storage.googleapis.com" # endpoint = "https://storage.googleapis.com"
## The query engine options.
[query]
## Parallelism of the query engine.
## Default to 0, which means the number of CPU cores.
parallelism = 0
## The data storage options. ## The data storage options.
[storage] [storage]
## The working home directory. ## The working home directory.

View File

@@ -179,12 +179,6 @@ metadata_cache_ttl = "10m"
# TTI of the metadata cache. # TTI of the metadata cache.
metadata_cache_tti = "5m" metadata_cache_tti = "5m"
## The query engine options.
[query]
## Parallelism of the query engine.
## Default to 0, which means the number of CPU cores.
parallelism = 0
## Datanode options. ## Datanode options.
[datanode] [datanode]
## Datanode client options. ## Datanode client options.

View File

@@ -50,10 +50,6 @@ use_memory_store = false
## - Using shared storage (e.g., s3). ## - Using shared storage (e.g., s3).
enable_region_failover = false enable_region_failover = false
## Whether to allow region failover on local WAL.
## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
allow_region_failover_on_local_wal = false
## Max allowed idle time before removing node info from metasrv memory. ## Max allowed idle time before removing node info from metasrv memory.
node_max_idle_time = "24hours" node_max_idle_time = "24hours"
@@ -134,22 +130,6 @@ broker_endpoints = ["127.0.0.1:9092"]
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)` ## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
auto_create_topics = true auto_create_topics = true
## Interval of automatically WAL pruning.
## Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.
auto_prune_interval = "0s"
## The threshold to trigger a flush operation of a region in automatically WAL pruning.
## Metasrv will send a flush request to flush the region when:
## `trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`
## where:
## - `prunable_entry_id` is the maximum entry id that can be pruned of the region.
## - `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.
## Set to `0` to disable the flush operation.
trigger_flush_threshold = 0
## Concurrent task limit for automatically WAL pruning.
auto_prune_parallelism = 10
## Number of topics. ## Number of topics.
num_topics = 64 num_topics = 64

View File

@@ -334,12 +334,6 @@ max_running_procedures = 128
# credential = "base64-credential" # credential = "base64-credential"
# endpoint = "https://storage.googleapis.com" # endpoint = "https://storage.googleapis.com"
## The query engine options.
[query]
## Parallelism of the query engine.
## Default to 0, which means the number of CPU cores.
parallelism = 0
## The data storage options. ## The data storage options.
[storage] [storage]
## The working home directory. ## The working home directory.

View File

@@ -1,6 +1,6 @@
# Profile memory usage of GreptimeDB # Profile memory usage of GreptimeDB
This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](docs/how-to/memory-profile-scripts). This crate provides an easy approach to dump memory profiling info.
## Prerequisites ## Prerequisites
### jemalloc ### jemalloc

View File

@@ -1,52 +0,0 @@
# Memory Analysis Process
This section will guide you through the process of analyzing memory usage for greptimedb.
1. Get the `jeprof` tool script, see the next section("Getting the `jeprof` tool") for details.
2. After starting `greptimedb`(with env var `MALLOC_CONF=prof:true`), execute the `dump.sh` script with the PID of the `greptimedb` process as an argument. This continuously monitors memory usage and captures profiles when exceeding thresholds (e.g. +20MB within 10 minutes). Outputs `greptime-{timestamp}.gprof` files.
3. With 2-3 gprof files, run `gen_flamegraph.sh` in the same environment to generate flame graphs showing memory allocation call stacks.
4. **NOTE:** The `gen_flamegraph.sh` script requires `jeprof` and optionally `flamegraph.pl` to be in the current directory. If needed to gen flamegraph now, run the `get_flamegraph_tool.sh` script, which downloads the flame graph generation tool `flamegraph.pl` to the current directory.
The usage of `gen_flamegraph.sh` is:
`Usage: ./gen_flamegraph.sh <binary_path> <gprof_directory>`
where `<binary_path>` is the path to the greptimedb binary, `<gprof_directory>` is the directory containing the gprof files(the directory `dump.sh` is dumping profiles to).
Example call: `./gen_flamegraph.sh ./greptime .`
Generating the flame graph might take a few minutes. The generated flame graphs are located in the `<gprof_directory>/flamegraphs` directory. Or if no `flamegraph.pl` is found, it will only contain `.collapse` files which is also fine.
5. You can send the generated flame graphs(the entire folder of `<gprof_directory>/flamegraphs`) to developers for further analysis.
## Getting the `jeprof` tool
there are three ways to get `jeprof`, list in here from simple to complex, using any one of those methods is ok, as long as it's the same environment as the `greptimedb` will be running on:
1. If you are compiling greptimedb from source, then `jeprof` is already produced during compilation. After running `cargo build`, execute `find_compiled_jeprof.sh`. This will copy `jeprof` to the current directory.
2. Or, if you have the Rust toolchain installed locally, simply follow these commands:
```bash
cargo new get_jeprof
cd get_jeprof
```
Then add this line to `Cargo.toml`:
```toml
[dependencies]
tikv-jemalloc-ctl = { version = "0.6", features = ["use_std", "stats"] }
```
then run:
```bash
cargo build
```
after that the `jeprof` tool is produced. Now run `find_compiled_jeprof.sh` in current directory, it will copy the `jeprof` tool to the current directory.
3. compile jemalloc from source
you can first clone this repo, and checkout to this commit:
```bash
git clone https://github.com/tikv/jemalloc.git
cd jemalloc
git checkout e13ca993e8ccb9ba9847cc330696e02839f328f7
```
then run:
```bash
./configure
make
```
and `jeprof` is in `.bin/` directory. Copy it to the current directory.

View File

@@ -1,78 +0,0 @@
#!/bin/bash
# Monitors greptime process memory usage every 10 minutes
# Triggers memory profile capture via `curl -X POST localhost:4000/debug/prof/mem > greptime-{timestamp}.gprof`
# when memory increases by more than 20MB since last check
# Generated profiles can be analyzed using flame graphs as described in `how-to-profile-memory.md`
# (jeprof is compiled with the database - see documentation)
# Alternative: Share binaries + profiles for analysis (Docker images preferred)
# Threshold in Kilobytes (20 MB)
threshold_kb=$((20 * 1024))
sleep_interval=$((10 * 60))
# Variable to store the last measured memory usage in KB
last_mem_kb=0
echo "Starting memory monitoring for 'greptime' process..."
while true; do
# Check if PID is provided as an argument
if [ -z "$1" ]; then
echo "$(date): PID must be provided as a command-line argument."
exit 1
fi
pid="$1"
# Validate that the PID is a number
if ! [[ "$pid" =~ ^[0-9]+$ ]]; then
echo "$(date): Invalid PID: '$pid'. PID must be a number."
exit 1
fi
# Get the current Resident Set Size (RSS) in Kilobytes
current_mem_kb=$(ps -o rss= -p "$pid")
# Check if ps command was successful and returned a number
if ! [[ "$current_mem_kb" =~ ^[0-9]+$ ]]; then
echo "$(date): Failed to get memory usage for PID $pid. Skipping check."
# Keep last_mem_kb to avoid false positives if the process briefly becomes unreadable.
continue
fi
echo "$(date): Current memory usage for PID $pid: ${current_mem_kb} KB"
# Compare with the last measurement
# if it's the first run, also do a baseline dump just to make sure we can dump
diff_kb=$((current_mem_kb - last_mem_kb))
echo "$(date): Memory usage change since last check: ${diff_kb} KB"
if [ "$diff_kb" -gt "$threshold_kb" ]; then
echo "$(date): Memory increase (${diff_kb} KB) exceeded threshold (${threshold_kb} KB). Dumping profile..."
timestamp=$(date +%Y%m%d%H%M%S)
profile_file="greptime-${timestamp}.gprof"
# Execute curl and capture output to file
if curl -sf -X POST localhost:4000/debug/prof/mem > "$profile_file"; then
echo "$(date): Memory profile saved to $profile_file"
else
echo "$(date): Failed to dump memory profile (curl exit code: $?)."
# Remove the potentially empty/failed profile file
rm -f "$profile_file"
fi
else
echo "$(date): Memory increase (${diff_kb} KB) is within the threshold (${threshold_kb} KB)."
fi
# Update the last memory usage
last_mem_kb=$current_mem_kb
# Wait for 5 minutes
echo "$(date): Sleeping for $sleep_interval seconds..."
sleep $sleep_interval
done
echo "Memory monitoring script stopped." # This line might not be reached in normal operation

View File

@@ -1,15 +0,0 @@
#!/bin/bash
# Locates compiled jeprof binary (memory analysis tool) after cargo build
# Copies it to current directory from target/ build directories
JPROF_PATH=$(find . -name 'jeprof' -print -quit)
if [ -n "$JPROF_PATH" ]; then
echo "Found jeprof at $JPROF_PATH"
cp "$JPROF_PATH" .
chmod +x jeprof
echo "Copied jeprof to current directory and made it executable."
else
echo "jeprof not found"
exit 1
fi

View File

@@ -1,89 +0,0 @@
#!/bin/bash
# Generate flame graphs from a series of `.gprof` files
# First argument: Path to the binary executable
# Second argument: Path to directory containing gprof files
# Requires `jeprof` and `flamegraph.pl` in current directory
# What this script essentially does is:
# ./jeprof <binary> <gprof> --collapse | ./flamegraph.pl > <output>
# For differential analysis between consecutive profiles:
# ./jeprof <binary> --base <gprof1> <gprof2> --collapse | ./flamegraph.pl > <output_diff>
set -e # Exit immediately if a command exits with a non-zero status.
# Check for required tools
if [ ! -f "./jeprof" ]; then
echo "Error: jeprof not found in the current directory."
exit 1
fi
if [ ! -f "./flamegraph.pl" ]; then
echo "Error: flamegraph.pl not found in the current directory."
exit 1
fi
# Check arguments
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <binary_path> <gprof_directory>"
exit 1
fi
BINARY_PATH=$1
GPROF_DIR=$2
OUTPUT_DIR="${GPROF_DIR}/flamegraphs" # Store outputs in a subdirectory
if [ ! -f "$BINARY_PATH" ]; then
echo "Error: Binary file not found at $BINARY_PATH"
exit 1
fi
if [ ! -d "$GPROF_DIR" ]; then
echo "Error: gprof directory not found at $GPROF_DIR"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
echo "Generating flamegraphs in $OUTPUT_DIR"
# Find and sort gprof files
# Use find + sort -V for natural sort of version numbers if present in filenames
# Use null-terminated strings for safety with find/xargs/sort
mapfile -d $'\0' gprof_files < <(find "$GPROF_DIR" -maxdepth 1 -name '*.gprof' -print0 | sort -zV)
if [ ${#gprof_files[@]} -eq 0 ]; then
echo "No .gprof files found in $GPROF_DIR"
exit 0
fi
prev_gprof=""
# Generate flamegraphs
for gprof_file in "${gprof_files[@]}"; do
# Skip empty entries if any
if [ -z "$gprof_file" ]; then
continue
fi
filename=$(basename "$gprof_file" .gprof)
output_collapse="${OUTPUT_DIR}/${filename}.collapse"
output_svg="${OUTPUT_DIR}/${filename}.svg"
echo "Generating collapse file for $gprof_file -> $output_collapse"
./jeprof "$BINARY_PATH" "$gprof_file" --collapse > "$output_collapse"
echo "Generating flamegraph for $gprof_file -> $output_svg"
./flamegraph.pl "$output_collapse" > "$output_svg" || true
# Generate diff flamegraph if not the first file
if [ -n "$prev_gprof" ]; then
prev_filename=$(basename "$prev_gprof" .gprof)
diff_output_collapse="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.collapse"
diff_output_svg="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.svg"
echo "Generating diff collapse file for $prev_gprof vs $gprof_file -> $diff_output_collapse"
./jeprof "$BINARY_PATH" --base "$prev_gprof" "$gprof_file" --collapse > "$diff_output_collapse"
echo "Generating diff flamegraph for $prev_gprof vs $gprof_file -> $diff_output_svg"
./flamegraph.pl "$diff_output_collapse" > "$diff_output_svg" || true
fi
prev_gprof="$gprof_file"
done
echo "Flamegraph generation complete."

View File

@@ -1,44 +0,0 @@
#!/bin/bash
# Generate flame graphs from .collapse files
# Argument: Path to directory containing collapse files
# Requires `flamegraph.pl` in current directory
# Check if flamegraph.pl exists
if [ ! -f "./flamegraph.pl" ]; then
echo "Error: flamegraph.pl not found in the current directory."
exit 1
fi
# Check if directory argument is provided
if [ -z "$1" ]; then
echo "Usage: $0 <collapse_directory>"
exit 1
fi
COLLAPSE_DIR=$1
# Check if the provided argument is a directory
if [ ! -d "$COLLAPSE_DIR" ]; then
echo "Error: '$COLLAPSE_DIR' is not a valid directory."
exit 1
fi
echo "Generating flame graphs from collapse files in '$COLLAPSE_DIR'..."
# Find and process each .collapse file
find "$COLLAPSE_DIR" -maxdepth 1 -name "*.collapse" -print0 | while IFS= read -r -d $'\0' collapse_file; do
if [ -f "$collapse_file" ]; then
# Construct the output SVG filename
svg_file="${collapse_file%.collapse}.svg"
echo "Generating $svg_file from $collapse_file..."
./flamegraph.pl "$collapse_file" > "$svg_file"
if [ $? -ne 0 ]; then
echo "Error generating flame graph for $collapse_file"
else
echo "Successfully generated $svg_file"
fi
fi
done
echo "Flame graph generation complete."

View File

@@ -1,6 +0,0 @@
#!/bin/bash
# Download flamegraph.pl to current directory - this is the flame graph generation tool script
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
chmod +x ./flamegraph.pl

View File

@@ -1,89 +1,61 @@
# Grafana dashboards for GreptimeDB Grafana dashboard for GreptimeDB
--------------------------------
## Overview GreptimeDB's official Grafana dashboard.
This repository maintains the Grafana dashboards for GreptimeDB. It has two types of dashboards: Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
- `cluster/dashboard.json`: The Grafana dashboard for the GreptimeDB cluster. Read the [dashboard.md](./dashboards/cluster/dashboard.md) for more details. If you use Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
- `standalone/dashboard.json`: The Grafana dashboard for the standalone GreptimeDB instance. **It's generated from the `cluster/dashboard.json` by removing the instance filter through the `make dashboards` command**. Read the [dashboard.md](./dashboards/standalone/dashboard.md) for more details.
As the rapid development of GreptimeDB, the metrics may be changed, and please feel free to submit your feedback and/or contribution to this dashboard 🤗
**NOTE**:
- The Grafana version should be greater than 9.0.
- If you want to modify the dashboards, you only need to modify the `cluster/dashboard.json` and run the `make dashboards` command to generate the `standalone/dashboard.json` and other related files.
To maintain the dashboards easily, we use the [`dac`](https://github.com/zyy17/dac) tool to generate the intermediate dashboards and markdown documents:
- `cluster/dashboard.yaml`: The intermediate dashboard for the GreptimeDB cluster.
- `standalone/dashboard.yaml`: The intermediate dashboard for the standalone GreptimeDB instance.
## Data Sources
There are two data sources for the dashboards to fetch the metrics:
- **Prometheus**: Expose the metrics of GreptimeDB.
- **Information Schema**: It is the MySQL port of the current monitored instance. The `overview` dashboard will use this datasource to show the information schema of the current instance.
## Instance Filters
To deploy the dashboards for multiple scenarios (K8s, bare metal, etc.), we prefer to use the `instance` label when filtering instances.
Additionally, we recommend including the `pod` label in the legend to make it easier to identify each instance, even though this field will be empty in bare metal scenarios.
For example, the following query is recommended:
```promql
sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
```
And the legend will be like: `[{{instance}}]-[{{ pod }}]`.
## Deployment
### Helm
If you use the Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy a GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
- `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster; - `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
- `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard; - `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;
The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started). The standalone GreptimeDB instance will collect metrics from your cluster and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
### Self-host Prometheus and import dashboards manually # How to use
1. **Configure Prometheus to scrape the cluster** ## `greptimedb.json`
The following is an example configuration(**Please modify it according to your actual situation**): Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.
```yml ## `greptimedb-cluster.json`
# example config
# only to indicate how to assign labels to each target
# modify yours accordingly
scrape_configs:
- job_name: metasrv
static_configs:
- targets: ['<metasrv-ip>:<port>']
- job_name: datanode This cluster dashboard provides a comprehensive view of incoming requests, response statuses, and internal activities such as flush and compaction, with a layered structure from frontend to datanode. Designed with a focus on alert functionality, its primary aim is to highlight any anomalies in metrics, allowing users to quickly pinpoint the cause of errors.
static_configs:
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
- job_name: frontend We use Prometheus to scrape off metrics from nodes in GreptimeDB cluster, Grafana to visualize the diagram. Any compatible stack should work too.
static_configs:
- targets: ['<frontend-ip>:<port>']
```
2. **Configure the data sources in Grafana** __Note__: This dashboard is still in an early stage of development. Any issue or advice on improvement is welcomed.
You need to add two data sources in Grafana: ### Configuration
- Prometheus: It is the Prometheus instance that scrapes the GreptimeDB metrics. Please ensure the following configuration before importing the dashboard into Grafana.
- Information Schema: It is the MySQL port of the current monitored instance. The dashboard will use this datasource to show the information schema of the current instance.
3. **Import the dashboards based on your deployment scenario** __1. Prometheus scrape config__
- **Cluster**: Import the `cluster/dashboard.json` dashboard. Configure Prometheus to scrape the cluster.
- **Standalone**: Import the `standalone/dashboard.json` dashboard.
```yml
# example config
# only to indicate how to assign labels to each target
# modify yours accordingly
scrape_configs:
- job_name: metasrv
static_configs:
- targets: ['<metasrv-ip>:<port>']
- job_name: datanode
static_configs:
- targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
- job_name: frontend
static_configs:
- targets: ['<frontend-ip>:<port>']
```
__2. Grafana config__
Create a Prometheus data source in Grafana before using this dashboard. We use `datasource` as a variable in Grafana dashboard so that multiple environments are supported.
### Usage
Use `datasource` or `instance` on the upper-left corner to filter data from certain node.

19
grafana/check.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
exit 0
fi

File diff suppressed because it is too large Load Diff

View File

@@ -1,97 +0,0 @@
# Overview
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
# Ingestion
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
# Queries
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
# Resources
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
# Frontend Requests
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
# Frontend to Datanode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
# Mito Engine
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{instance=~"$datanode"}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
# Metasrv
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
# Flownode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |

View File

@@ -1,769 +0,0 @@
groups:
- title: Overview
panels:
- title: Uptime
type: stat
description: The start time of GreptimeDB.
unit: s
queries:
- expr: time() - process_start_time_seconds
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Version
type: stat
description: GreptimeDB version.
queries:
- expr: SELECT pkg_version FROM information_schema.build_info
datasource:
type: mysql
uid: ${information_schema}
- title: Total Ingestion Rate
type: stat
description: Total ingestion rate.
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Total Storage Size
type: stat
description: Total number of data file size.
unit: decbytes
queries:
- expr: select SUM(disk_size) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Total Rows
type: stat
description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
unit: sishort
queries:
- expr: select SUM(region_rows) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Deployment
type: stat
description: The deployment topology of GreptimeDB.
queries:
- expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
datasource:
type: mysql
uid: ${information_schema}
- title: Database Resources
type: stat
description: The number of the key resources in GreptimeDB.
queries:
- expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as flows FROM information_schema.flows
datasource:
type: mysql
uid: ${information_schema}
- title: Data Size
type: stat
description: The data size of wal/index/manifest in the GreptimeDB.
unit: decbytes
queries:
- expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Ingestion
panels:
- title: Total Ingestion Rate
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: ingestion
- title: Ingestion Rate by Type
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: http-logs
- expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: prometheus-remote-write
- title: Queries
panels:
- title: Total Query Rate
type: timeseries
description: |-
Total rate of query API calls by protocol. This metric is collected from frontends.
Here we listed 3 main protocols:
- MySQL
- Postgres
- Prometheus API
Note that there are some other minor query APIs like /sql are not included
unit: reqps
queries:
- expr: sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: mysql
- expr: sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: pg
- expr: sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: promql
- title: Resources
panels:
- title: Datanode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{ pod }}]'
- title: Datanode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
- title: Metasrv Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
- title: Metasrv CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Requests
panels:
- title: HTTP QPS per Instance
type: timeseries
description: HTTP QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
- title: HTTP P99 per Instance
type: timeseries
description: HTTP P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: gRPC QPS per Instance
type: timeseries
description: gRPC QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
- title: gRPC P99 per Instance
type: timeseries
description: gRPC P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: MySQL QPS per Instance
type: timeseries
description: MySQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: MySQL P99 per Instance
type: timeseries
description: MySQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
- title: PostgreSQL QPS per Instance
type: timeseries
description: PostgreSQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: PostgreSQL P99 per Instance
type: timeseries
description: PostgreSQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Frontend to Datanode
panels:
- title: Ingest Rows per Instance
type: timeseries
description: Ingestion rate by row as in each frontend
unit: rowsps
queries:
- expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Region Call QPS per Instance
type: timeseries
description: Region Call QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Region Call P99 per Instance
type: timeseries
description: Region Call P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Mito Engine
panels:
- title: Request OPS per Instance
type: timeseries
description: Request QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Request P99 per Instance
type: timeseries
description: Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Write Buffer per Instance
type: timeseries
description: Write Buffer per Instance.
unit: decbytes
queries:
- expr: greptime_mito_write_buffer_bytes{instance=~"$datanode"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Write Rows per Instance
type: timeseries
description: Ingestion size by row counts.
unit: rowsps
queries:
- expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Flush OPS per Instance
type: timeseries
description: Flush QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
- title: Write Stall per Instance
type: timeseries
description: Write Stall per Instance.
queries:
- expr: sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage OPS per Instance
type: timeseries
description: Read Stage OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage P99 per Instance
type: timeseries
description: Read Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Write Stage P99 per Instance
type: timeseries
description: Write Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Compaction OPS per Instance
type: timeseries
description: Compaction OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- title: Compaction P99 per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
- title: WAL write size
type: timeseries
description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
unit: bytes
queries:
- expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
- expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
- expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
- title: Cached Bytes per Instance
type: timeseries
description: Cached Bytes per Instance.
unit: decbytes
queries:
- expr: greptime_mito_cache_bytes{instance=~"$datanode"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Inflight Compaction
type: timeseries
description: Ongoing compaction task count
unit: none
queries:
- expr: greptime_mito_inflight_compaction_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: WAL sync duration seconds
type: timeseries
description: Raft engine (local disk) log store sync latency, p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Log Store op duration seconds
type: timeseries
description: Write-ahead log operations latency at p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
- title: Inflight Flush
type: timeseries
description: Ongoing flush task count
unit: none
queries:
- expr: greptime_mito_inflight_flush_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: OpenDAL
panels:
- title: QPS per Instance
type: timeseries
description: QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Read QPS per Instance
type: timeseries
description: Read QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Read P99 per Instance
type: timeseries
description: Read P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write QPS per Instance
type: timeseries
description: Write QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write P99 per Instance
type: timeseries
description: Write P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List QPS per Instance
type: timeseries
description: List QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List P99 per Instance
type: timeseries
description: List P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Other Requests per Instance
type: timeseries
description: Other Requests per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read|write|list|stat"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Other Request P99 per Instance
type: timeseries
description: Other Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Opendal traffic
type: timeseries
description: Total traffic as in bytes by instance and operation
unit: decbytes
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: OpenDAL errors per Instance
type: timeseries
description: OpenDAL error counts per Instance.
queries:
- expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
- title: Metasrv
panels:
- title: Region migration datanode
type: state-timeline
description: Counter of region migration by source and destination
unit: none
queries:
- expr: greptime_meta_region_migration_stat{datanode_type="src"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: from-datanode-{{datanode_id}}
- expr: greptime_meta_region_migration_stat{datanode_type="desc"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: to-datanode-{{datanode_id}}
- title: Region migration error
type: timeseries
description: Counter of region migration error
unit: none
queries:
- expr: greptime_meta_region_migration_error
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Datanode load
type: timeseries
description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
unit: none
queries:
- expr: greptime_datanode_load
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Flownode
panels:
- title: Flow Ingest / Output Rate
type: timeseries
description: Flow Ingest / Output Rate.
queries:
- expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
- title: Flow Ingest Latency
type: timeseries
description: Flow Ingest Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Flow Operation Latency
type: timeseries
description: Flow Operation Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
- title: Flow Buffer Size per Instance
type: timeseries
description: Flow Buffer Size per Instance.
queries:
- expr: greptime_flow_input_buf_size
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}]'
- title: Flow Processing Error per Instance
type: timeseries
description: Flow Processing Error per Instance.
queries:
- expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'

File diff suppressed because it is too large Load Diff

View File

@@ -1,97 +0,0 @@
# Overview
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
# Ingestion
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
# Queries
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
# Resources
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
# Frontend Requests
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
# Frontend to Datanode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
# Mito Engine
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
# Metasrv
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
# Flownode
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |

View File

@@ -1,769 +0,0 @@
groups:
- title: Overview
panels:
- title: Uptime
type: stat
description: The start time of GreptimeDB.
unit: s
queries:
- expr: time() - process_start_time_seconds
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Version
type: stat
description: GreptimeDB version.
queries:
- expr: SELECT pkg_version FROM information_schema.build_info
datasource:
type: mysql
uid: ${information_schema}
- title: Total Ingestion Rate
type: stat
description: Total ingestion rate.
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Total Storage Size
type: stat
description: Total number of data file size.
unit: decbytes
queries:
- expr: select SUM(disk_size) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Total Rows
type: stat
description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
unit: sishort
queries:
- expr: select SUM(region_rows) from information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Deployment
type: stat
description: The deployment topology of GreptimeDB.
queries:
- expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
datasource:
type: mysql
uid: ${information_schema}
- title: Database Resources
type: stat
description: The number of the key resources in GreptimeDB.
queries:
- expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT COUNT(*) as flows FROM information_schema.flows
datasource:
type: mysql
uid: ${information_schema}
- title: Data Size
type: stat
description: The data size of wal/index/manifest in the GreptimeDB.
unit: decbytes
queries:
- expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
datasource:
type: mysql
uid: ${information_schema}
- title: Ingestion
panels:
- title: Total Ingestion Rate
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: ingestion
- title: Ingestion Rate by Type
type: timeseries
description: |
Total ingestion rate.
Here we listed 3 primary protocols:
- Prometheus remote write
- Greptime's gRPC API (when using our ingest SDK)
- Log ingestion http API
unit: rowsps
queries:
- expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: http-logs
- expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: prometheus-remote-write
- title: Queries
panels:
- title: Total Query Rate
type: timeseries
description: |-
Total rate of query API calls by protocol. This metric is collected from frontends.
Here we listed 3 main protocols:
- MySQL
- Postgres
- Prometheus API
Note that there are some other minor query APIs like /sql are not included
unit: reqps
queries:
- expr: sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: mysql
- expr: sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: pg
- expr: sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: promql
- title: Resources
panels:
- title: Datanode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{ pod }}]'
- title: Datanode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
- title: Metasrv Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
- title: Metasrv CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode Memory per Instance
type: timeseries
description: Current memory usage by instance
unit: decbytes
queries:
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Flownode CPU Usage per Instance
type: timeseries
description: Current cpu usage by instance
unit: none
queries:
- expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]'
- title: Frontend Requests
panels:
- title: HTTP QPS per Instance
type: timeseries
description: HTTP QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health|/metrics"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
- title: HTTP P99 per Instance
type: timeseries
description: HTTP P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health|/metrics"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: gRPC QPS per Instance
type: timeseries
description: gRPC QPS per Instance.
unit: reqps
queries:
- expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
- title: gRPC P99 per Instance
type: timeseries
description: gRPC P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
- title: MySQL QPS per Instance
type: timeseries
description: MySQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: MySQL P99 per Instance
type: timeseries
description: MySQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
- title: PostgreSQL QPS per Instance
type: timeseries
description: PostgreSQL QPS per Instance.
unit: reqps
queries:
- expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: PostgreSQL P99 per Instance
type: timeseries
description: PostgreSQL P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Frontend to Datanode
panels:
- title: Ingest Rows per Instance
type: timeseries
description: Ingestion rate by row as in each frontend
unit: rowsps
queries:
- expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Region Call QPS per Instance
type: timeseries
description: Region Call QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Region Call P99 per Instance
type: timeseries
description: Region Call P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
- title: Mito Engine
panels:
- title: Request OPS per Instance
type: timeseries
description: Request QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Request P99 per Instance
type: timeseries
description: Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Write Buffer per Instance
type: timeseries
description: Write Buffer per Instance.
unit: decbytes
queries:
- expr: greptime_mito_write_buffer_bytes{}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Write Rows per Instance
type: timeseries
description: Ingestion size by row counts.
unit: rowsps
queries:
- expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Flush OPS per Instance
type: timeseries
description: Flush QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
- title: Write Stall per Instance
type: timeseries
description: Write Stall per Instance.
queries:
- expr: sum by(instance, pod) (greptime_mito_write_stall_total{})
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage OPS per Instance
type: timeseries
description: Read Stage OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: Read Stage P99 per Instance
type: timeseries
description: Read Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Write Stage P99 per Instance
type: timeseries
description: Write Stage P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
- title: Compaction OPS per Instance
type: timeseries
description: Compaction OPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- title: Compaction P99 per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
- title: WAL write size
type: timeseries
description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
unit: bytes
queries:
- expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
- expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
- expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
- title: Cached Bytes per Instance
type: timeseries
description: Cached Bytes per Instance.
unit: decbytes
queries:
- expr: greptime_mito_cache_bytes{}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
- title: Inflight Compaction
type: timeseries
description: Ongoing compaction task count
unit: none
queries:
- expr: greptime_mito_inflight_compaction_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: WAL sync duration seconds
type: timeseries
description: Raft engine (local disk) log store sync latency, p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Log Store op duration seconds
type: timeseries
description: Write-ahead log operations latency at p99
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
- title: Inflight Flush
type: timeseries
description: Ongoing flush task count
unit: none
queries:
- expr: greptime_mito_inflight_flush_count
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]'
- title: OpenDAL
panels:
- title: QPS per Instance
type: timeseries
description: QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Read QPS per Instance
type: timeseries
description: Read QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Read P99 per Instance
type: timeseries
description: Read P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write QPS per Instance
type: timeseries
description: Write QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
- title: Write P99 per Instance
type: timeseries
description: Write P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List QPS per Instance
type: timeseries
description: List QPS per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: List P99 per Instance
type: timeseries
description: List P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
- title: Other Requests per Instance
type: timeseries
description: Other Requests per Instance.
unit: ops
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read|write|list|stat"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Other Request P99 per Instance
type: timeseries
description: Other Request P99 per Instance.
unit: s
queries:
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: Opendal traffic
type: timeseries
description: Total traffic as in bytes by instance and operation
unit: decbytes
queries:
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
- title: OpenDAL errors per Instance
type: timeseries
description: OpenDAL error counts per Instance.
queries:
- expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
- title: Metasrv
panels:
- title: Region migration datanode
type: state-timeline
description: Counter of region migration by source and destination
unit: none
queries:
- expr: greptime_meta_region_migration_stat{datanode_type="src"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: from-datanode-{{datanode_id}}
- expr: greptime_meta_region_migration_stat{datanode_type="desc"}
datasource:
type: prometheus
uid: ${metrics}
legendFormat: to-datanode-{{datanode_id}}
- title: Region migration error
type: timeseries
description: Counter of region migration error
unit: none
queries:
- expr: greptime_meta_region_migration_error
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Datanode load
type: timeseries
description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
unit: none
queries:
- expr: greptime_datanode_load
datasource:
type: prometheus
uid: ${metrics}
legendFormat: __auto
- title: Flownode
panels:
- title: Flow Ingest / Output Rate
type: timeseries
description: Flow Ingest / Output Rate.
queries:
- expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
- title: Flow Ingest Latency
type: timeseries
description: Flow Ingest Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
- title: Flow Operation Latency
type: timeseries
description: Flow Operation Latency.
queries:
- expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
- expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
- title: Flow Buffer Size per Instance
type: timeseries
description: Flow Buffer Size per Instance.
queries:
- expr: greptime_flow_input_buf_size
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}]'
- title: Flow Processing Error per Instance
type: timeseries
description: Flow Processing Error per Instance.
queries:
- expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'

File diff suppressed because it is too large Load Diff

4159
grafana/greptimedb.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,54 +0,0 @@
#!/usr/bin/env bash
DASHBOARD_DIR=${1:-grafana/dashboards}
check_dashboard_description() {
for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
echo "Checking $dashboard description"
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $dashboard | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
fi
done
}
check_dashboards_generation() {
./grafana/scripts/gen-dashboards.sh
if [[ -n "$(git diff --name-only grafana/dashboards)" ]]; then
echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
exit 1
fi
}
check_datasource() {
for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
echo "Checking $dashboard datasource"
jq -r '.panels[] | select(.type != "row") | .targets[] | [.datasource.type, .datasource.uid] | @tsv' $dashboard | while read -r type uid; do
# if the datasource is prometheus, check if the uid is ${metrics}
if [[ "$type" == "prometheus" && "$uid" != "\${metrics}" ]]; then
echo "Error: The datasource uid of $dashboard is not valid. It should be \${metrics}, got $uid"
exit 1
fi
# if the datasource is mysql, check if the uid is ${information_schema}
if [[ "$type" == "mysql" && "$uid" != "\${information_schema}" ]]; then
echo "Error: The datasource uid of $dashboard is not valid. It should be \${information_schema}, got $uid"
exit 1
fi
done
done
}
check_dashboards_generation
check_dashboard_description
check_datasource

View File

@@ -1,25 +0,0 @@
#! /usr/bin/env bash
CLUSTER_DASHBOARD_DIR=${1:-grafana/dashboards/cluster}
STANDALONE_DASHBOARD_DIR=${2:-grafana/dashboards/standalone}
DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
remove_instance_filters() {
# Remove the instance filters for the standalone dashboards.
sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
}
generate_intermediate_dashboards_and_docs() {
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
-i /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.json \
-o /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.yaml \
-m /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.md
docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
-i /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.json \
-o /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.yaml \
-m /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.md
}
remove_instance_filters
generate_intermediate_dashboards_and_docs

11
grafana/summary.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
echo '| Title | Description | Expressions |
|---|---|---|'
cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels |
map(select(.type == "stat" or .type == "timeseries")) |
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
'

View File

@@ -1,74 +0,0 @@
# Copyright 2023 Greptime Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
from multiprocessing import Pool
def find_rust_files(directory):
rust_files = []
for root, _, files in os.walk(directory):
# Skip files with "test" in the path
if "test" in root.lower():
continue
for file in files:
# Skip files with "test" in the filename
if "test" in file.lower():
continue
if file.endswith(".rs"):
rust_files.append(os.path.join(root, file))
return rust_files
def check_file_for_super_import(file_path):
with open(file_path, "r") as file:
lines = file.readlines()
violations = []
for line_number, line in enumerate(lines, 1):
# Check for "use super::" without leading tab
if line.startswith("use super::"):
violations.append((line_number, line.strip()))
if violations:
return file_path, violations
return None
def main():
rust_files = find_rust_files(".")
with Pool() as pool:
results = pool.map(check_file_for_super_import, rust_files)
# Filter out None results
violations = [result for result in results if result]
if violations:
print("Found 'use super::' without leading tab in the following files:")
counter = 1
for file_path, file_violations in violations:
for line_number, line in file_violations:
print(f"{counter:>5} {file_path}:{line_number} - {line}")
counter += 1
raise SystemExit(1)
else:
print("No 'use super::' without leading tab found. All files are compliant.")
if __name__ == "__main__":
main()

View File

@@ -514,7 +514,6 @@ fn query_request_type(request: &QueryRequest) -> &'static str {
Some(Query::Sql(_)) => "query.sql", Some(Query::Sql(_)) => "query.sql",
Some(Query::LogicalPlan(_)) => "query.logical_plan", Some(Query::LogicalPlan(_)) => "query.logical_plan",
Some(Query::PromRangeQuery(_)) => "query.prom_range", Some(Query::PromRangeQuery(_)) => "query.prom_range",
Some(Query::InsertIntoPlan(_)) => "query.insert_into_plan",
None => "query.empty", None => "query.empty",
} }
} }

View File

@@ -49,6 +49,7 @@ pub use table_names::*;
use views::InformationSchemaViews; use views::InformationSchemaViews;
use self::columns::InformationSchemaColumns; use self::columns::InformationSchemaColumns;
use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
use crate::error::{Error, Result}; use crate::error::{Error, Result};
use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo; use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
use crate::system_schema::information_schema::flows::InformationSchemaFlows; use crate::system_schema::information_schema::flows::InformationSchemaFlows;
@@ -62,9 +63,7 @@ use crate::system_schema::information_schema::table_constraints::InformationSche
use crate::system_schema::information_schema::tables::InformationSchemaTables; use crate::system_schema::information_schema::tables::InformationSchemaTables;
use crate::system_schema::memory_table::MemoryTable; use crate::system_schema::memory_table::MemoryTable;
pub(crate) use crate::system_schema::predicate::Predicates; pub(crate) use crate::system_schema::predicate::Predicates;
use crate::system_schema::{ use crate::system_schema::SystemSchemaProvider;
SystemSchemaProvider, SystemSchemaProviderInner, SystemTable, SystemTableRef,
};
use crate::CatalogManager; use crate::CatalogManager;
lazy_static! { lazy_static! {

View File

@@ -36,8 +36,9 @@ use datatypes::vectors::{
use snafu::ResultExt; use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::CLUSTER_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates, CLUSTER_INFO}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils; use crate::system_schema::utils;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -38,11 +38,11 @@ use snafu::{OptionExt, ResultExt};
use sql::statements; use sql::statements;
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, COLUMNS};
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::information_schema::Predicates; use crate::information_schema::Predicates;
use crate::system_schema::information_schema::{InformationTable, COLUMNS};
use crate::CatalogManager; use crate::CatalogManager;
#[derive(Debug)] #[derive(Debug)]

View File

@@ -18,7 +18,7 @@ use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
use datatypes::schema::{Schema, SchemaRef}; use datatypes::schema::{Schema, SchemaRef};
use datatypes::vectors::{Int64Vector, StringVector, VectorRef}; use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
use crate::system_schema::information_schema::table_names::*; use super::table_names::*;
use crate::system_schema::utils::tables::{ use crate::system_schema::utils::tables::{
bigint_column, string_column, string_columns, timestamp_micro_column, bigint_column, string_column, string_columns, timestamp_micro_column,
}; };

View File

@@ -24,17 +24,18 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatch
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream; use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef}; use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, FulltextBackend, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder}; use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
use futures_util::TryStreamExt; use futures_util::TryStreamExt;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::KEY_COLUMN_USAGE;
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::system_schema::information_schema::{InformationTable, Predicates, KEY_COLUMN_USAGE}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::CatalogManager; use crate::CatalogManager;
pub const CONSTRAINT_SCHEMA: &str = "constraint_schema"; pub const CONSTRAINT_SCHEMA: &str = "constraint_schema";
@@ -47,38 +48,20 @@ pub const TABLE_SCHEMA: &str = "table_schema";
pub const TABLE_NAME: &str = "table_name"; pub const TABLE_NAME: &str = "table_name";
pub const COLUMN_NAME: &str = "column_name"; pub const COLUMN_NAME: &str = "column_name";
pub const ORDINAL_POSITION: &str = "ordinal_position"; pub const ORDINAL_POSITION: &str = "ordinal_position";
/// The type of the index.
pub const GREPTIME_INDEX_TYPE: &str = "greptime_index_type";
const INIT_CAPACITY: usize = 42; const INIT_CAPACITY: usize = 42;
/// Time index constraint name
pub(crate) const CONSTRAINT_NAME_TIME_INDEX: &str = "TIME INDEX";
/// Primary key constraint name /// Primary key constraint name
pub(crate) const CONSTRAINT_NAME_PRI: &str = "PRIMARY"; pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
/// Primary key index type /// Time index constraint name
pub(crate) const INDEX_TYPE_PRI: &str = "greptime-primary-key-v1"; pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
/// Inverted index constraint name /// Inverted index constraint name
pub(crate) const CONSTRAINT_NAME_INVERTED_INDEX: &str = "INVERTED INDEX"; pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
/// Inverted index type
pub(crate) const INDEX_TYPE_INVERTED_INDEX: &str = "greptime-inverted-index-v1";
/// Fulltext index constraint name /// Fulltext index constraint name
pub(crate) const CONSTRAINT_NAME_FULLTEXT_INDEX: &str = "FULLTEXT INDEX"; pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
/// Fulltext index v1 type
pub(crate) const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "greptime-fulltext-index-v1";
/// Fulltext index bloom type
pub(crate) const INDEX_TYPE_FULLTEXT_BLOOM: &str = "greptime-fulltext-index-bloom";
/// Skipping index constraint name /// Skipping index constraint name
pub(crate) const CONSTRAINT_NAME_SKIPPING_INDEX: &str = "SKIPPING INDEX"; pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
/// Skipping index type
pub(crate) const INDEX_TYPE_SKIPPING_INDEX: &str = "greptime-bloom-filter-v1";
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`. /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
///
/// Provides an extra column `greptime_index_type` for the index type of the key column.
#[derive(Debug)] #[derive(Debug)]
pub(super) struct InformationSchemaKeyColumnUsage { pub(super) struct InformationSchemaKeyColumnUsage {
schema: SchemaRef, schema: SchemaRef,
@@ -138,11 +121,6 @@ impl InformationSchemaKeyColumnUsage {
ConcreteDataType::string_datatype(), ConcreteDataType::string_datatype(),
true, true,
), ),
ColumnSchema::new(
GREPTIME_INDEX_TYPE,
ConcreteDataType::string_datatype(),
true,
),
])) ]))
} }
@@ -207,7 +185,6 @@ struct InformationSchemaKeyColumnUsageBuilder {
column_name: StringVectorBuilder, column_name: StringVectorBuilder,
ordinal_position: UInt32VectorBuilder, ordinal_position: UInt32VectorBuilder,
position_in_unique_constraint: UInt32VectorBuilder, position_in_unique_constraint: UInt32VectorBuilder,
greptime_index_type: StringVectorBuilder,
} }
impl InformationSchemaKeyColumnUsageBuilder { impl InformationSchemaKeyColumnUsageBuilder {
@@ -230,7 +207,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY), column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY), ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY), position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
greptime_index_type: StringVectorBuilder::with_capacity(INIT_CAPACITY),
} }
} }
@@ -254,47 +230,34 @@ impl InformationSchemaKeyColumnUsageBuilder {
for (idx, column) in schema.column_schemas().iter().enumerate() { for (idx, column) in schema.column_schemas().iter().enumerate() {
let mut constraints = vec![]; let mut constraints = vec![];
let mut greptime_index_type = vec![];
if column.is_time_index() { if column.is_time_index() {
self.add_key_column_usage( self.add_key_column_usage(
&predicates, &predicates,
&schema_name, &schema_name,
CONSTRAINT_NAME_TIME_INDEX, TIME_INDEX_CONSTRAINT_NAME,
&catalog_name, &catalog_name,
&schema_name, &schema_name,
table_name, table_name,
&column.name, &column.name,
1, //always 1 for time index 1, //always 1 for time index
"",
); );
} }
// TODO(dimbtp): foreign key constraint not supported yet // TODO(dimbtp): foreign key constraint not supported yet
if keys.contains(&idx) { if keys.contains(&idx) {
constraints.push(CONSTRAINT_NAME_PRI); constraints.push(PRI_CONSTRAINT_NAME);
greptime_index_type.push(INDEX_TYPE_PRI);
} }
if column.is_inverted_indexed() { if column.is_inverted_indexed() {
constraints.push(CONSTRAINT_NAME_INVERTED_INDEX); constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
greptime_index_type.push(INDEX_TYPE_INVERTED_INDEX);
} }
if let Ok(Some(options)) = column.fulltext_options() { if column.is_fulltext_indexed() {
if options.enable { constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
constraints.push(CONSTRAINT_NAME_FULLTEXT_INDEX);
let index_type = match options.backend {
FulltextBackend::Bloom => INDEX_TYPE_FULLTEXT_BLOOM,
FulltextBackend::Tantivy => INDEX_TYPE_FULLTEXT_TANTIVY,
};
greptime_index_type.push(index_type);
}
} }
if column.is_skipping_indexed() { if column.is_skipping_indexed() {
constraints.push(CONSTRAINT_NAME_SKIPPING_INDEX); constraints.push(SKIPPING_INDEX_CONSTRAINT_NAME);
greptime_index_type.push(INDEX_TYPE_SKIPPING_INDEX);
} }
if !constraints.is_empty() { if !constraints.is_empty() {
let aggregated_constraints = constraints.join(", "); let aggregated_constraints = constraints.join(", ");
let aggregated_index_types = greptime_index_type.join(", ");
self.add_key_column_usage( self.add_key_column_usage(
&predicates, &predicates,
&schema_name, &schema_name,
@@ -304,7 +267,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
table_name, table_name,
&column.name, &column.name,
idx as u32 + 1, idx as u32 + 1,
&aggregated_index_types,
); );
} }
} }
@@ -327,7 +289,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
table_name: &str, table_name: &str,
column_name: &str, column_name: &str,
ordinal_position: u32, ordinal_position: u32,
index_types: &str,
) { ) {
let row = [ let row = [
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)), (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
@@ -337,7 +298,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
(TABLE_NAME, &Value::from(table_name)), (TABLE_NAME, &Value::from(table_name)),
(COLUMN_NAME, &Value::from(column_name)), (COLUMN_NAME, &Value::from(column_name)),
(ORDINAL_POSITION, &Value::from(ordinal_position)), (ORDINAL_POSITION, &Value::from(ordinal_position)),
(GREPTIME_INDEX_TYPE, &Value::from(index_types)),
]; ];
if !predicates.eval(&row) { if !predicates.eval(&row) {
@@ -354,7 +314,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
self.column_name.push(Some(column_name)); self.column_name.push(Some(column_name));
self.ordinal_position.push(Some(ordinal_position)); self.ordinal_position.push(Some(ordinal_position));
self.position_in_unique_constraint.push(None); self.position_in_unique_constraint.push(None);
self.greptime_index_type.push(Some(index_types));
} }
fn finish(&mut self) -> Result<RecordBatch> { fn finish(&mut self) -> Result<RecordBatch> {
@@ -378,7 +337,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
null_string_vector.clone(), null_string_vector.clone(),
null_string_vector.clone(), null_string_vector.clone(),
null_string_vector, null_string_vector,
Arc::new(self.greptime_index_type.finish()),
]; ];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu) RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
} }

View File

@@ -39,12 +39,13 @@ use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use table::metadata::{TableInfo, TableType}; use table::metadata::{TableInfo, TableType};
use super::PARTITIONS;
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu, CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu,
Result, UpgradeWeakCatalogManagerRefSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::kvbackend::KvBackendCatalogManager; use crate::kvbackend::KvBackendCatalogManager;
use crate::system_schema::information_schema::{InformationTable, Predicates, PARTITIONS}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::CatalogManager; use crate::CatalogManager;
const TABLE_CATALOG: &str = "table_catalog"; const TABLE_CATALOG: &str = "table_catalog";

View File

@@ -33,8 +33,9 @@ use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder}
use snafu::ResultExt; use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::PROCEDURE_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates, PROCEDURE_INFO}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils; use crate::system_schema::utils;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -35,12 +35,13 @@ use snafu::{OptionExt, ResultExt};
use store_api::storage::{RegionId, ScanRequest, TableId}; use store_api::storage::{RegionId, ScanRequest, TableId};
use table::metadata::TableType; use table::metadata::TableType;
use super::REGION_PEERS;
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result, CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
UpgradeWeakCatalogManagerRefSnafu, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::kvbackend::KvBackendCatalogManager; use crate::kvbackend::KvBackendCatalogManager;
use crate::system_schema::information_schema::{InformationTable, Predicates, REGION_PEERS}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::CatalogManager; use crate::CatalogManager;
pub const TABLE_CATALOG: &str = "table_catalog"; pub const TABLE_CATALOG: &str = "table_catalog";

View File

@@ -30,9 +30,9 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorB
use snafu::ResultExt; use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, REGION_STATISTICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::information_schema::Predicates; use crate::information_schema::Predicates;
use crate::system_schema::information_schema::{InformationTable, REGION_STATISTICS};
use crate::system_schema::utils; use crate::system_schema::utils;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -35,8 +35,8 @@ use itertools::Itertools;
use snafu::ResultExt; use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, RUNTIME_METRICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, RUNTIME_METRICS};
#[derive(Debug)] #[derive(Debug)]
pub(super) struct InformationSchemaMetrics { pub(super) struct InformationSchemaMetrics {

View File

@@ -31,11 +31,12 @@ use datatypes::vectors::StringVectorBuilder;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::SCHEMATA;
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, TableMetadataManagerSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, TableMetadataManagerSnafu,
UpgradeWeakCatalogManagerRefSnafu, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::system_schema::information_schema::{InformationTable, Predicates, SCHEMATA}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils; use crate::system_schema::utils;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -32,14 +32,14 @@ use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, TABLE_CONSTRAINTS};
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::information_schema::key_column_usage::{ use crate::information_schema::key_column_usage::{
CONSTRAINT_NAME_PRI, CONSTRAINT_NAME_TIME_INDEX, PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
}; };
use crate::information_schema::Predicates; use crate::information_schema::Predicates;
use crate::system_schema::information_schema::{InformationTable, TABLE_CONSTRAINTS};
use crate::CatalogManager; use crate::CatalogManager;
/// The `TABLE_CONSTRAINTS` table describes which tables have constraints. /// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
@@ -188,7 +188,7 @@ impl InformationSchemaTableConstraintsBuilder {
self.add_table_constraint( self.add_table_constraint(
&predicates, &predicates,
&schema_name, &schema_name,
CONSTRAINT_NAME_TIME_INDEX, TIME_INDEX_CONSTRAINT_NAME,
&schema_name, &schema_name,
&table.table_info().name, &table.table_info().name,
TIME_INDEX_CONSTRAINT_TYPE, TIME_INDEX_CONSTRAINT_TYPE,
@@ -199,7 +199,7 @@ impl InformationSchemaTableConstraintsBuilder {
self.add_table_constraint( self.add_table_constraint(
&predicates, &predicates,
&schema_name, &schema_name,
CONSTRAINT_NAME_PRI, PRI_CONSTRAINT_NAME,
&schema_name, &schema_name,
&table.table_info().name, &table.table_info().name,
PRI_KEY_CONSTRAINT_TYPE, PRI_KEY_CONSTRAINT_TYPE,

View File

@@ -38,10 +38,11 @@ use snafu::{OptionExt, ResultExt};
use store_api::storage::{RegionId, ScanRequest, TableId}; use store_api::storage::{RegionId, ScanRequest, TableId};
use table::metadata::{TableInfo, TableType}; use table::metadata::{TableInfo, TableType};
use super::TABLES;
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::system_schema::information_schema::{InformationTable, Predicates, TABLES}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils; use crate::system_schema::utils;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -32,12 +32,13 @@ use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use table::metadata::TableType; use table::metadata::TableType;
use super::VIEWS;
use crate::error::{ use crate::error::{
CastManagerSnafu, CreateRecordBatchSnafu, GetViewCacheSnafu, InternalSnafu, Result, CastManagerSnafu, CreateRecordBatchSnafu, GetViewCacheSnafu, InternalSnafu, Result,
UpgradeWeakCatalogManagerRefSnafu, ViewInfoNotFoundSnafu, UpgradeWeakCatalogManagerRefSnafu, ViewInfoNotFoundSnafu,
}; };
use crate::kvbackend::KvBackendCatalogManager; use crate::kvbackend::KvBackendCatalogManager;
use crate::system_schema::information_schema::{InformationTable, Predicates, VIEWS}; use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::CatalogManager; use crate::CatalogManager;
const INIT_CAPACITY: usize = 42; const INIT_CAPACITY: usize = 42;

View File

@@ -29,8 +29,8 @@ use datatypes::vectors::VectorRef;
use snafu::ResultExt; use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use super::SystemTable;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::SystemTable;
/// A memory table with specified schema and columns. /// A memory table with specified schema and columns.
#[derive(Debug)] #[derive(Debug)]

View File

@@ -34,9 +34,9 @@ use table::TableRef;
pub use table_names::*; pub use table_names::*;
use self::pg_namespace::oid_map::{PGNamespaceOidMap, PGNamespaceOidMapRef}; use self::pg_namespace::oid_map::{PGNamespaceOidMap, PGNamespaceOidMapRef};
use crate::system_schema::memory_table::MemoryTable; use super::memory_table::MemoryTable;
use crate::system_schema::utils::tables::u32_column; use super::utils::tables::u32_column;
use crate::system_schema::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef}; use super::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
use crate::CatalogManager; use crate::CatalogManager;
lazy_static! { lazy_static! {

View File

@@ -17,9 +17,9 @@ use std::sync::Arc;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{Int16Vector, StringVector, UInt32Vector, VectorRef}; use datatypes::vectors::{Int16Vector, StringVector, UInt32Vector, VectorRef};
use super::oid_column;
use super::table_names::PG_TYPE;
use crate::memory_table_cols; use crate::memory_table_cols;
use crate::system_schema::pg_catalog::oid_column;
use crate::system_schema::pg_catalog::table_names::PG_TYPE;
use crate::system_schema::utils::tables::{i16_column, string_column}; use crate::system_schema::utils::tables::{i16_column, string_column};
fn pg_type_schema_columns() -> (Vec<ColumnSchema>, Vec<VectorRef>) { fn pg_type_schema_columns() -> (Vec<ColumnSchema>, Vec<VectorRef>) {

View File

@@ -32,12 +32,12 @@ use snafu::{OptionExt, ResultExt};
use store_api::storage::ScanRequest; use store_api::storage::ScanRequest;
use table::metadata::TableType; use table::metadata::TableType;
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::information_schema::Predicates; use crate::information_schema::Predicates;
use crate::system_schema::pg_catalog::pg_namespace::oid_map::PGNamespaceOidMapRef;
use crate::system_schema::pg_catalog::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
use crate::system_schema::utils::tables::{string_column, u32_column}; use crate::system_schema::utils::tables::{string_column, u32_column};
use crate::system_schema::SystemTable; use crate::system_schema::SystemTable;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -29,12 +29,12 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::ScanRequest; use store_api::storage::ScanRequest;
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
use super::{query_ctx, OID_COLUMN_NAME, PG_DATABASE};
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::information_schema::Predicates; use crate::information_schema::Predicates;
use crate::system_schema::pg_catalog::pg_namespace::oid_map::PGNamespaceOidMapRef;
use crate::system_schema::pg_catalog::{query_ctx, OID_COLUMN_NAME, PG_DATABASE};
use crate::system_schema::utils::tables::{string_column, u32_column}; use crate::system_schema::utils::tables::{string_column, u32_column};
use crate::system_schema::SystemTable; use crate::system_schema::SystemTable;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -35,13 +35,11 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::ScanRequest; use store_api::storage::ScanRequest;
use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
use crate::error::{ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
}; };
use crate::information_schema::Predicates; use crate::information_schema::Predicates;
use crate::system_schema::pg_catalog::{
query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE,
};
use crate::system_schema::utils::tables::{string_column, u32_column}; use crate::system_schema::utils::tables::{string_column, u32_column};
use crate::system_schema::SystemTable; use crate::system_schema::SystemTable;
use crate::CatalogManager; use crate::CatalogManager;

View File

@@ -437,7 +437,10 @@ mod tests {
} }
fn column(name: &str) -> Expr { fn column(name: &str) -> Expr {
Expr::Column(Column::from_name(name)) Expr::Column(Column {
relation: None,
name: name.to_string(),
})
} }
fn string_literal(v: &str) -> Expr { fn string_literal(v: &str) -> Expr {

View File

@@ -27,7 +27,7 @@ use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableType; use table::metadata::TableType;
use table::table::adapter::DfTableProviderAdapter; use table::table::adapter::DfTableProviderAdapter;
pub mod dummy_catalog; mod dummy_catalog;
use dummy_catalog::DummyCatalogList; use dummy_catalog::DummyCatalogList;
use table::TableRef; use table::TableRef;

View File

@@ -17,6 +17,7 @@ use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug; use common_macro::stack_trace_debug;
use rustyline::error::ReadlineError;
use snafu::{Location, Snafu}; use snafu::{Location, Snafu};
#[derive(Snafu)] #[derive(Snafu)]
@@ -104,6 +105,52 @@ pub enum Error {
#[snafu(display("Invalid REPL command: {reason}"))] #[snafu(display("Invalid REPL command: {reason}"))]
InvalidReplCommand { reason: String }, InvalidReplCommand { reason: String },
#[snafu(display("Cannot create REPL"))]
ReplCreation {
#[snafu(source)]
error: ReadlineError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Error reading command"))]
Readline {
#[snafu(source)]
error: ReadlineError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to request database, sql: {sql}"))]
RequestDatabase {
sql: String,
#[snafu(source)]
source: client::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to collect RecordBatches"))]
CollectRecordBatches {
#[snafu(implicit)]
location: Location,
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to pretty print Recordbatches"))]
PrettyPrintRecordBatches {
#[snafu(implicit)]
location: Location,
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to start Meta client"))]
StartMetaClient {
#[snafu(implicit)]
location: Location,
source: meta_client::error::Error,
},
#[snafu(display("Failed to parse SQL: {}", sql))] #[snafu(display("Failed to parse SQL: {}", sql))]
ParseSql { ParseSql {
sql: String, sql: String,
@@ -119,6 +166,13 @@ pub enum Error {
source: query::error::Error, source: query::error::Error,
}, },
#[snafu(display("Failed to encode logical plan in substrait"))]
SubstraitEncodeLogicalPlan {
#[snafu(implicit)]
location: Location,
source: substrait::error::Error,
},
#[snafu(display("Failed to load layered config"))] #[snafu(display("Failed to load layered config"))]
LoadLayeredConfig { LoadLayeredConfig {
#[snafu(source(from(common_config::error::Error, Box::new)))] #[snafu(source(from(common_config::error::Error, Box::new)))]
@@ -264,10 +318,17 @@ impl ErrorExt for Error {
Error::StartProcedureManager { source, .. } Error::StartProcedureManager { source, .. }
| Error::StopProcedureManager { source, .. } => source.status_code(), | Error::StopProcedureManager { source, .. } => source.status_code(),
Error::StartWalOptionsAllocator { source, .. } => source.status_code(), Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
Error::HttpQuerySql { .. } => StatusCode::Internal, Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
StatusCode::Internal
}
Error::RequestDatabase { source, .. } => source.status_code(),
Error::CollectRecordBatches { source, .. }
| Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
Error::StartMetaClient { source, .. } => source.status_code(),
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => { Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
source.status_code() source.status_code()
} }
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
Error::SerdeJson { .. } Error::SerdeJson { .. }
| Error::FileIo { .. } | Error::FileIo { .. }

View File

@@ -23,12 +23,15 @@ mod helper;
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373 // Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
mod database; mod database;
mod import; mod import;
#[allow(unused)]
mod repl;
use async_trait::async_trait; use async_trait::async_trait;
use clap::Parser; use clap::Parser;
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
pub use database::DatabaseClient; pub use database::DatabaseClient;
use error::Result; use error::Result;
pub use repl::Repl;
pub use crate::bench::BenchTableMetadataCommand; pub use crate::bench::BenchTableMetadataCommand;
pub use crate::export::ExportCommand; pub use crate::export::ExportCommand;

299
src/cli/src/repl.rs Normal file
View File

@@ -0,0 +1,299 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use cache::{
build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME,
TABLE_ROUTE_CACHE_NAME,
};
use catalog::information_extension::DistributedInformationExtension;
use catalog::kvbackend::{
CachedKvBackend, CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend,
};
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_base::Plugins;
use common_config::Mode;
use common_error::ext::ErrorExt;
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
use common_meta::kv_backend::KvBackendRef;
use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::debug;
use either::Either;
use meta_client::client::{ClusterKvBackend, MetaClientBuilder};
use query::datafusion::DatafusionQueryEngine;
use query::parser::QueryLanguageParser;
use query::query_engine::{DefaultSerializer, QueryEngineState};
use query::QueryEngine;
use rustyline::error::ReadlineError;
use rustyline::Editor;
use session::context::QueryContext;
use snafu::{OptionExt, ResultExt};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use crate::cmd::ReplCommand;
use crate::error::{
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
SubstraitEncodeLogicalPlanSnafu,
};
use crate::helper::RustylineHelper;
use crate::{error, AttachCommand};
/// Captures the state of the repl, gathers commands and executes them one by one
pub struct Repl {
/// Rustyline editor for interacting with user on command line
rl: Editor<RustylineHelper>,
/// Current prompt
prompt: String,
/// Client for interacting with GreptimeDB
database: Database,
query_engine: Option<DatafusionQueryEngine>,
}
#[allow(clippy::print_stdout)]
impl Repl {
fn print_help(&self) {
println!("{}", ReplCommand::help())
}
pub(crate) async fn try_new(cmd: &AttachCommand) -> Result<Self> {
let mut rl = Editor::new().context(ReplCreationSnafu)?;
if !cmd.disable_helper {
rl.set_helper(Some(RustylineHelper::default()));
let history_file = history_file();
if let Err(e) = rl.load_history(&history_file) {
debug!(
"failed to load history file on {}, error: {e}",
history_file.display()
);
}
}
let client = Client::with_urls([&cmd.grpc_addr]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
let query_engine = if let Some(meta_addr) = &cmd.meta_addr {
create_query_engine(meta_addr).await.map(Some)?
} else {
None
};
Ok(Self {
rl,
prompt: "> ".to_string(),
database,
query_engine,
})
}
/// Parse the next command
fn next_command(&mut self) -> Result<ReplCommand> {
match self.rl.readline(&self.prompt) {
Ok(ref line) => {
let request = line.trim();
let _ = self.rl.add_history_entry(request.to_string());
request.try_into()
}
Err(ReadlineError::Eof) | Err(ReadlineError::Interrupted) => Ok(ReplCommand::Exit),
// Some sort of real underlying error
Err(e) => Err(e).context(ReadlineSnafu),
}
}
/// Read Evaluate Print Loop (interactive command line) for GreptimeDB
///
/// Inspired / based on repl.rs from InfluxDB IOX
pub(crate) async fn run(&mut self) -> Result<()> {
println!("Ready for commands. (Hint: try 'help')");
loop {
match self.next_command()? {
ReplCommand::Help => {
self.print_help();
}
ReplCommand::UseDatabase { db_name } => {
if self.execute_sql(format!("USE {db_name}")).await {
println!("Using {db_name}");
self.database.set_schema(&db_name);
self.prompt = format!("[{db_name}] > ");
}
}
ReplCommand::Sql { sql } => {
let _ = self.execute_sql(sql).await;
}
ReplCommand::Exit => {
return Ok(());
}
}
}
}
async fn execute_sql(&self, sql: String) -> bool {
self.do_execute_sql(sql)
.await
.map_err(|e| {
let status_code = e.status_code();
let root_cause = e.output_msg();
println!("Error: {}({status_code}), {root_cause}", status_code as u32)
})
.is_ok()
}
async fn do_execute_sql(&self, sql: String) -> Result<()> {
let start = Instant::now();
let output = if let Some(query_engine) = &self.query_engine {
let query_ctx = Arc::new(QueryContext::with(
self.database.catalog(),
self.database.schema(),
));
let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
let plan = query_engine
.planner()
.plan(&stmt, query_ctx.clone())
.await
.context(PlanStatementSnafu)?;
let plan = query_engine
.optimize(&query_engine.engine_context(query_ctx), &plan)
.context(PlanStatementSnafu)?;
let plan = DFLogicalSubstraitConvertor {}
.encode(&plan, DefaultSerializer)
.context(SubstraitEncodeLogicalPlanSnafu)?;
self.database.logical_plan(plan.to_vec()).await
} else {
self.database.sql(&sql).await
}
.context(RequestDatabaseSnafu { sql: &sql })?;
let either = match output.data {
OutputData::Stream(s) => {
let x = RecordBatches::try_collect(s)
.await
.context(CollectRecordBatchesSnafu)?;
Either::Left(x)
}
OutputData::RecordBatches(x) => Either::Left(x),
OutputData::AffectedRows(rows) => Either::Right(rows),
};
let end = Instant::now();
match either {
Either::Left(recordbatches) => {
let total_rows: usize = recordbatches.iter().map(|x| x.num_rows()).sum();
if total_rows > 0 {
println!(
"{}",
recordbatches
.pretty_print()
.context(PrettyPrintRecordBatchesSnafu)?
);
}
println!("Total Rows: {total_rows}")
}
Either::Right(rows) => println!("Affected Rows: {rows}"),
};
println!("Cost {} ms", (end - start).as_millis());
Ok(())
}
}
impl Drop for Repl {
fn drop(&mut self) {
if self.rl.helper().is_some() {
let history_file = history_file();
if let Err(e) = self.rl.save_history(&history_file) {
debug!(
"failed to save history file on {}, error: {e}",
history_file.display()
);
}
}
}
}
/// Return the location of the history file (defaults to $HOME/".greptimedb_cli_history")
fn history_file() -> PathBuf {
let mut buf = match std::env::var("HOME") {
Ok(home) => PathBuf::from(home),
Err(_) => PathBuf::new(),
};
buf.push(".greptimedb_cli_history");
buf
}
async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
let mut meta_client = MetaClientBuilder::default().enable_store().build();
meta_client
.start([meta_addr])
.await
.context(StartMetaClientSnafu)?;
let meta_client = Arc::new(meta_client);
let cached_meta_backend = Arc::new(
CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone()))).build(),
);
let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
CacheRegistryBuilder::default()
.add_cache(cached_meta_backend.clone())
.build(),
);
let fundamental_cache_registry =
build_fundamental_cache_registry(Arc::new(MetaKvBackend::new(meta_client.clone())));
let layered_cache_registry = Arc::new(
with_default_composite_cache_registry(
layered_cache_builder.add_cache_registry(fundamental_cache_registry),
)
.context(error::BuildCacheRegistrySnafu)?
.build(),
);
let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
information_extension,
cached_meta_backend.clone(),
layered_cache_registry,
None,
);
let plugins: Plugins = Default::default();
let state = Arc::new(QueryEngineState::new(
catalog_manager,
None,
None,
None,
None,
false,
plugins.clone(),
));
Ok(DatafusionQueryEngine::new(state, plugins))
}

View File

@@ -16,7 +16,6 @@ arc-swap = "1.6"
arrow-flight.workspace = true arrow-flight.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait.workspace = true async-trait.workspace = true
base64.workspace = true
common-catalog.workspace = true common-catalog.workspace = true
common-error.workspace = true common-error.workspace = true
common-grpc.workspace = true common-grpc.workspace = true
@@ -26,7 +25,6 @@ common-query.workspace = true
common-recordbatch.workspace = true common-recordbatch.workspace = true
common-telemetry.workspace = true common-telemetry.workspace = true
enum_dispatch = "0.3" enum_dispatch = "0.3"
futures.workspace = true
futures-util.workspace = true futures-util.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
moka = { workspace = true, features = ["future"] } moka = { workspace = true, features = ["future"] }

View File

@@ -12,49 +12,36 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::pin::Pin;
use std::str::FromStr;
use api::v1::auth_header::AuthScheme; use api::v1::auth_header::AuthScheme;
use api::v1::ddl_request::Expr as DdlExpr; use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::greptime_database_client::GreptimeDatabaseClient; use api::v1::greptime_database_client::GreptimeDatabaseClient;
use api::v1::greptime_request::Request; use api::v1::greptime_request::Request;
use api::v1::query_request::Query; use api::v1::query_request::Query;
use api::v1::{ use api::v1::{
AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest, AlterTableExpr, AuthHeader, CreateTableExpr, DdlRequest, GreptimeRequest, InsertRequests,
InsertRequests, QueryRequest, RequestHeader, QueryRequest, RequestHeader,
}; };
use arrow_flight::{FlightData, Ticket}; use arrow_flight::Ticket;
use async_stream::stream; use async_stream::stream;
use base64::prelude::BASE64_STANDARD;
use base64::Engine;
use common_catalog::build_db_string;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_grpc::flight::do_put::DoPutResponse;
use common_grpc::flight::{FlightDecoder, FlightMessage}; use common_grpc::flight::{FlightDecoder, FlightMessage};
use common_query::Output; use common_query::Output;
use common_recordbatch::error::ExternalSnafu; use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::RecordBatchStreamWrapper; use common_recordbatch::RecordBatchStreamWrapper;
use common_telemetry::error; use common_telemetry::error;
use common_telemetry::tracing_context::W3cTrace; use common_telemetry::tracing_context::W3cTrace;
use futures::future; use futures_util::StreamExt;
use futures_util::{Stream, StreamExt, TryStreamExt};
use prost::Message; use prost::Message;
use snafu::{ensure, ResultExt}; use snafu::{ensure, ResultExt};
use tonic::metadata::{AsciiMetadataKey, MetadataValue}; use tonic::metadata::AsciiMetadataKey;
use tonic::transport::Channel; use tonic::transport::Channel;
use crate::error::{ use crate::error::{
ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu, InvalidAsciiSnafu, ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu, InvalidAsciiSnafu,
InvalidTonicMetadataValueSnafu, ServerSnafu, ServerSnafu,
}; };
use crate::{from_grpc_response, Client, Result}; use crate::{from_grpc_response, Client, Result};
type FlightDataStream = Pin<Box<dyn Stream<Item = FlightData> + Send>>;
type DoPutResponseStream = Pin<Box<dyn Stream<Item = Result<DoPutResponse>>>>;
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Database { pub struct Database {
// The "catalog" and "schema" to be used in processing the requests at the server side. // The "catalog" and "schema" to be used in processing the requests at the server side.
@@ -121,24 +108,16 @@ impl Database {
self.catalog = catalog.into(); self.catalog = catalog.into();
} }
fn catalog_or_default(&self) -> &str { pub fn catalog(&self) -> &String {
if self.catalog.is_empty() { &self.catalog
DEFAULT_CATALOG_NAME
} else {
&self.catalog
}
} }
pub fn set_schema(&mut self, schema: impl Into<String>) { pub fn set_schema(&mut self, schema: impl Into<String>) {
self.schema = schema.into(); self.schema = schema.into();
} }
fn schema_or_default(&self) -> &str { pub fn schema(&self) -> &String {
if self.schema.is_empty() { &self.schema
DEFAULT_SCHEMA_NAME
} else {
&self.schema
}
} }
pub fn set_timezone(&mut self, timezone: impl Into<String>) { pub fn set_timezone(&mut self, timezone: impl Into<String>) {
@@ -185,7 +164,7 @@ impl Database {
from_grpc_response(response) from_grpc_response(response)
} }
pub async fn handle(&self, request: Request) -> Result<u32> { async fn handle(&self, request: Request) -> Result<u32> {
let mut client = make_database_client(&self.client)?.inner; let mut client = make_database_client(&self.client)?.inner;
let request = self.to_rpc_request(request); let request = self.to_rpc_request(request);
let response = client.handle(request).await?.into_inner(); let response = client.handle(request).await?.into_inner();
@@ -331,41 +310,6 @@ impl Database {
} }
} }
} }
/// Ingest a stream of [RecordBatch]es that belong to a table, using Arrow Flight's "`DoPut`"
/// method. The return value is also a stream, produces [DoPutResponse]s.
pub async fn do_put(&self, stream: FlightDataStream) -> Result<DoPutResponseStream> {
let mut request = tonic::Request::new(stream);
if let Some(AuthHeader {
auth_scheme: Some(AuthScheme::Basic(Basic { username, password })),
}) = &self.ctx.auth_header
{
let encoded = BASE64_STANDARD.encode(format!("{username}:{password}"));
let value =
MetadataValue::from_str(&encoded).context(InvalidTonicMetadataValueSnafu)?;
request.metadata_mut().insert("x-greptime-auth", value);
}
let db_to_put = if !self.dbname.is_empty() {
&self.dbname
} else {
&build_db_string(self.catalog_or_default(), self.schema_or_default())
};
request.metadata_mut().insert(
"x-greptime-db-name",
MetadataValue::from_str(db_to_put).context(InvalidTonicMetadataValueSnafu)?,
);
let mut client = self.client.make_flight_client()?;
let response = client.mut_inner().do_put(request).await?;
let response = response
.into_inner()
.map_err(Into::into)
.and_then(|x| future::ready(DoPutResponse::try_from(x).context(ConvertFlightDataSnafu)))
.boxed();
Ok(response)
}
} }
#[derive(Default, Debug, Clone)] #[derive(Default, Debug, Clone)]

View File

@@ -15,11 +15,10 @@
use std::any::Any; use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode}; use common_error::status_code::StatusCode;
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG}; use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
use common_macro::stack_trace_debug; use common_macro::stack_trace_debug;
use snafu::{location, Location, Snafu}; use snafu::{location, Location, Snafu};
use tonic::metadata::errors::InvalidMetadataValue;
use tonic::{Code, Status}; use tonic::{Code, Status};
#[derive(Snafu)] #[derive(Snafu)]
@@ -116,14 +115,6 @@ pub enum Error {
#[snafu(implicit)] #[snafu(implicit)]
location: Location, location: Location,
}, },
#[snafu(display("Invalid Tonic metadata value"))]
InvalidTonicMetadataValue {
#[snafu(source)]
error: InvalidMetadataValue,
#[snafu(implicit)]
location: Location,
},
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
@@ -144,9 +135,7 @@ impl ErrorExt for Error {
| Error::CreateTlsChannel { source, .. } => source.status_code(), | Error::CreateTlsChannel { source, .. } => source.status_code(),
Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected, Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
Error::InvalidAscii { .. } | Error::InvalidTonicMetadataValue { .. } => { Error::InvalidAscii { .. } => StatusCode::InvalidArguments,
StatusCode::InvalidArguments
}
} }
} }
@@ -163,15 +152,15 @@ impl From<Status> for Error {
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok()) .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
} }
let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| { let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
if let Ok(code) = s.parse::<u32>() { .and_then(|s| {
StatusCode::from_u32(code) if let Ok(code) = s.parse::<u32>() {
} else { StatusCode::from_u32(code)
None } else {
} None
}); }
let tonic_code = e.code(); })
let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code)); .unwrap_or(StatusCode::Unknown);
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG) let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
.unwrap_or_else(|| e.message().to_string()); .unwrap_or_else(|| e.message().to_string());
@@ -198,6 +187,9 @@ impl Error {
} | Self::RegionServer { } | Self::RegionServer {
code: Code::Unavailable, code: Code::Unavailable,
.. ..
} | Self::RegionServer {
code: Code::Unknown,
..
} }
) )
} }

View File

@@ -16,7 +16,7 @@
mod client; mod client;
pub mod client_manager; pub mod client_manager;
pub mod database; mod database;
pub mod error; pub mod error;
pub mod flow; pub mod flow;
pub mod load_balance; pub mod load_balance;

View File

@@ -201,11 +201,12 @@ impl RegionRequester {
.await .await
.map_err(|e| { .map_err(|e| {
let code = e.code(); let code = e.code();
let err: error::Error = e.into();
// Uses `Error::RegionServer` instead of `Error::Server` // Uses `Error::RegionServer` instead of `Error::Server`
error::Error::RegionServer { error::Error::RegionServer {
addr, addr,
code, code,
source: BoxedError::new(error::Error::from(e)), source: BoxedError::new(err),
location: location!(), location: location!(),
} }
})? })?

View File

@@ -68,6 +68,7 @@ query.workspace = true
rand.workspace = true rand.workspace = true
regex.workspace = true regex.workspace = true
reqwest.workspace = true reqwest.workspace = true
rustyline = "10.1"
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
servers.workspace = true servers.workspace = true

View File

@@ -17,6 +17,7 @@ use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug; use common_macro::stack_trace_debug;
use rustyline::error::ReadlineError;
use snafu::{Location, Snafu}; use snafu::{Location, Snafu};
#[derive(Snafu)] #[derive(Snafu)]
@@ -180,6 +181,52 @@ pub enum Error {
#[snafu(display("Invalid REPL command: {reason}"))] #[snafu(display("Invalid REPL command: {reason}"))]
InvalidReplCommand { reason: String }, InvalidReplCommand { reason: String },
#[snafu(display("Cannot create REPL"))]
ReplCreation {
#[snafu(source)]
error: ReadlineError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Error reading command"))]
Readline {
#[snafu(source)]
error: ReadlineError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to request database, sql: {sql}"))]
RequestDatabase {
sql: String,
#[snafu(source)]
source: client::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to collect RecordBatches"))]
CollectRecordBatches {
#[snafu(implicit)]
location: Location,
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to pretty print Recordbatches"))]
PrettyPrintRecordBatches {
#[snafu(implicit)]
location: Location,
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to start Meta client"))]
StartMetaClient {
#[snafu(implicit)]
location: Location,
source: meta_client::error::Error,
},
#[snafu(display("Failed to parse SQL: {}", sql))] #[snafu(display("Failed to parse SQL: {}", sql))]
ParseSql { ParseSql {
sql: String, sql: String,
@@ -195,6 +242,13 @@ pub enum Error {
source: query::error::Error, source: query::error::Error,
}, },
#[snafu(display("Failed to encode logical plan in substrait"))]
SubstraitEncodeLogicalPlan {
#[snafu(implicit)]
location: Location,
source: substrait::error::Error,
},
#[snafu(display("Failed to load layered config"))] #[snafu(display("Failed to load layered config"))]
LoadLayeredConfig { LoadLayeredConfig {
#[snafu(source(from(common_config::error::Error, Box::new)))] #[snafu(source(from(common_config::error::Error, Box::new)))]
@@ -341,10 +395,17 @@ impl ErrorExt for Error {
| Error::StopProcedureManager { source, .. } => source.status_code(), | Error::StopProcedureManager { source, .. } => source.status_code(),
Error::BuildWalOptionsAllocator { source, .. } Error::BuildWalOptionsAllocator { source, .. }
| Error::StartWalOptionsAllocator { source, .. } => source.status_code(), | Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
Error::HttpQuerySql { .. } => StatusCode::Internal, Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
StatusCode::Internal
}
Error::RequestDatabase { source, .. } => source.status_code(),
Error::CollectRecordBatches { source, .. }
| Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
Error::StartMetaClient { source, .. } => source.status_code(),
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => { Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
source.status_code() source.status_code()
} }
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
Error::SerdeJson { .. } Error::SerdeJson { .. }
| Error::FileIo { .. } | Error::FileIo { .. }

View File

@@ -32,9 +32,7 @@ use common_meta::key::TableMetadataManager;
use common_telemetry::info; use common_telemetry::info;
use common_telemetry::logging::TracingOptions; use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version}; use common_version::{short_version, version};
use flow::{ use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendClient, FrontendInvoker,
};
use meta_client::{MetaClientOptions, MetaClientType}; use meta_client::{MetaClientOptions, MetaClientType};
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{ensure, OptionExt, ResultExt};
use tracing_appender::non_blocking::WorkerGuard; use tracing_appender::non_blocking::WorkerGuard;
@@ -315,26 +313,16 @@ impl StartCommand {
); );
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone())); let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
let flownode_builder = FlownodeBuilder::new( let flownode_builder = FlownodeBuilder::new(
opts.clone(), opts,
Plugins::new(), Plugins::new(),
table_metadata_manager, table_metadata_manager,
catalog_manager.clone(), catalog_manager.clone(),
flow_metadata_manager, flow_metadata_manager,
Arc::new(frontend_client),
) )
.with_heartbeat_task(heartbeat_task); .with_heartbeat_task(heartbeat_task);
let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?; let flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
let services = FlownodeServiceBuilder::new(&opts)
.with_grpc_server(flownode.flownode_server().clone())
.enable_http_service()
.build()
.await
.context(StartFlownodeSnafu)?;
flownode.setup_services(services);
let flownode = flownode;
// flownode's frontend to datanode need not timeout. // flownode's frontend to datanode need not timeout.
// Some queries are expected to take long time. // Some queries are expected to take long time.
@@ -345,7 +333,7 @@ impl StartCommand {
let client = Arc::new(NodeClients::new(channel_config)); let client = Arc::new(NodeClients::new(channel_config));
let invoker = FrontendInvoker::build_from( let invoker = FrontendInvoker::build_from(
flownode.flow_engine().streaming_engine(), flownode.flow_worker_manager().clone(),
catalog_manager.clone(), catalog_manager.clone(),
cached_meta_backend.clone(), cached_meta_backend.clone(),
layered_cache_registry.clone(), layered_cache_registry.clone(),
@@ -355,9 +343,7 @@ impl StartCommand {
.await .await
.context(StartFlownodeSnafu)?; .context(StartFlownodeSnafu)?;
flownode flownode
.flow_engine() .flow_worker_manager()
.streaming_engine()
// TODO(discord9): refactor and avoid circular reference
.set_frontend_invoker(invoker) .set_frontend_invoker(invoker)
.await; .await;

View File

@@ -132,7 +132,7 @@ impl SubCommand {
} }
#[derive(Debug, Default, Parser)] #[derive(Debug, Default, Parser)]
pub struct StartCommand { struct StartCommand {
/// The address to bind the gRPC server. /// The address to bind the gRPC server.
#[clap(long, alias = "bind-addr")] #[clap(long, alias = "bind-addr")]
rpc_bind_addr: Option<String>, rpc_bind_addr: Option<String>,
@@ -172,7 +172,7 @@ pub struct StartCommand {
} }
impl StartCommand { impl StartCommand {
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> { fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
let mut opts = MetasrvOptions::load_layered_options( let mut opts = MetasrvOptions::load_layered_options(
self.config_file.as_deref(), self.config_file.as_deref(),
self.env_prefix.as_ref(), self.env_prefix.as_ref(),
@@ -261,7 +261,7 @@ impl StartCommand {
Ok(()) Ok(())
} }
pub async fn build(&self, opts: MetasrvOptions) -> Result<Instance> { async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
common_runtime::init_global_runtimes(&opts.runtime); common_runtime::init_global_runtimes(&opts.runtime);
let guard = common_telemetry::init_global_logging( let guard = common_telemetry::init_global_logging(

View File

@@ -55,10 +55,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
use datanode::datanode::{Datanode, DatanodeBuilder}; use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer; use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig; use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{ use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
FlowConfig, FlownodeBuilder, FlownodeInstance, FlownodeOptions, FrontendClient,
FrontendInvoker, GrpcQueryHandlerWithBoxedError, StreamingEngine,
};
use frontend::frontend::{Frontend, FrontendOptions}; use frontend::frontend::{Frontend, FrontendOptions};
use frontend::instance::builder::FrontendBuilder; use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{Instance as FeInstance, StandaloneDatanodeManager}; use frontend::instance::{Instance as FeInstance, StandaloneDatanodeManager};
@@ -77,10 +74,10 @@ use servers::http::HttpOptions;
use servers::tls::{TlsMode, TlsOption}; use servers::tls::{TlsMode, TlsOption};
use servers::Mode; use servers::Mode;
use snafu::ResultExt; use snafu::ResultExt;
use tokio::sync::RwLock; use tokio::sync::{broadcast, RwLock};
use tracing_appender::non_blocking::WorkerGuard; use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{Result, StartFlownodeSnafu}; use crate::error::Result;
use crate::options::{GlobalOptions, GreptimeOptions}; use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{error, log_versions, App}; use crate::{error, log_versions, App};
@@ -247,7 +244,9 @@ impl StandaloneOptions {
pub struct Instance { pub struct Instance {
datanode: Datanode, datanode: Datanode,
frontend: Frontend, frontend: Frontend,
flownode: FlownodeInstance, // TODO(discord9): wrapped it in flownode instance instead
flow_worker_manager: Arc<FlowWorkerManager>,
flow_shutdown: broadcast::Sender<()>,
procedure_manager: ProcedureManagerRef, procedure_manager: ProcedureManagerRef,
wal_options_allocator: WalOptionsAllocatorRef, wal_options_allocator: WalOptionsAllocatorRef,
// Keep the logging guard to prevent the worker from being dropped. // Keep the logging guard to prevent the worker from being dropped.
@@ -289,7 +288,9 @@ impl App for Instance {
.await .await
.context(error::StartFrontendSnafu)?; .context(error::StartFrontendSnafu)?;
self.flownode.start().await.context(StartFlownodeSnafu)?; self.flow_worker_manager
.clone()
.run_background(Some(self.flow_shutdown.subscribe()));
Ok(()) Ok(())
} }
@@ -310,9 +311,14 @@ impl App for Instance {
.await .await
.context(error::ShutdownDatanodeSnafu)?; .context(error::ShutdownDatanodeSnafu)?;
self.flownode self.flow_shutdown
.shutdown() .send(())
.await .map_err(|_e| {
flow::error::InternalSnafu {
reason: "Failed to send shutdown signal to flow worker manager, all receiver end already closed".to_string(),
}
.build()
})
.context(error::ShutdownFlownodeSnafu)?; .context(error::ShutdownFlownodeSnafu)?;
info!("Datanode instance stopped."); info!("Datanode instance stopped.");
@@ -523,36 +529,32 @@ impl StartCommand {
flow: opts.flow.clone(), flow: opts.flow.clone(),
..Default::default() ..Default::default()
}; };
// for standalone not use grpc, but get a handler to frontend grpc client without
// actually make a connection
let (frontend_client, frontend_instance_handler) =
FrontendClient::from_empty_grpc_handler();
let flow_builder = FlownodeBuilder::new( let flow_builder = FlownodeBuilder::new(
flownode_options, flownode_options,
plugins.clone(), plugins.clone(),
table_metadata_manager.clone(), table_metadata_manager.clone(),
catalog_manager.clone(), catalog_manager.clone(),
flow_metadata_manager.clone(), flow_metadata_manager.clone(),
Arc::new(frontend_client.clone()),
); );
let flownode = flow_builder let flownode = Arc::new(
.build() flow_builder
.await .build()
.map_err(BoxedError::new) .await
.context(error::OtherSnafu)?; .map_err(BoxedError::new)
.context(error::OtherSnafu)?,
);
// set the ref to query for the local flow state // set the ref to query for the local flow state
{ {
let flow_streaming_engine = flownode.flow_engine().streaming_engine(); let flow_worker_manager = flownode.flow_worker_manager();
information_extension information_extension
.set_flow_streaming_engine(flow_streaming_engine) .set_flow_worker_manager(flow_worker_manager.clone())
.await; .await;
} }
let node_manager = Arc::new(StandaloneDatanodeManager { let node_manager = Arc::new(StandaloneDatanodeManager {
region_server: datanode.region_server(), region_server: datanode.region_server(),
flow_server: flownode.flow_engine(), flow_server: flownode.flow_worker_manager(),
}); });
let table_id_sequence = Arc::new( let table_id_sequence = Arc::new(
@@ -606,19 +608,10 @@ impl StartCommand {
.context(error::StartFrontendSnafu)?; .context(error::StartFrontendSnafu)?;
let fe_instance = Arc::new(fe_instance); let fe_instance = Arc::new(fe_instance);
// set the frontend client for flownode let flow_worker_manager = flownode.flow_worker_manager();
let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
let weak_grpc_handler = Arc::downgrade(&grpc_handler);
frontend_instance_handler
.lock()
.unwrap()
.replace(weak_grpc_handler);
// set the frontend invoker for flownode
let flow_streaming_engine = flownode.flow_engine().streaming_engine();
// flow server need to be able to use frontend to write insert requests back // flow server need to be able to use frontend to write insert requests back
let invoker = FrontendInvoker::build_from( let invoker = FrontendInvoker::build_from(
flow_streaming_engine.clone(), flow_worker_manager.clone(),
catalog_manager.clone(), catalog_manager.clone(),
kv_backend.clone(), kv_backend.clone(),
layered_cache_registry.clone(), layered_cache_registry.clone(),
@@ -627,7 +620,9 @@ impl StartCommand {
) )
.await .await
.context(error::StartFlownodeSnafu)?; .context(error::StartFlownodeSnafu)?;
flow_streaming_engine.set_frontend_invoker(invoker).await; flow_worker_manager.set_frontend_invoker(invoker).await;
let (tx, _rx) = broadcast::channel(1);
let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins)) let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
.context(error::ServersSnafu)?; .context(error::ServersSnafu)?;
@@ -647,7 +642,8 @@ impl StartCommand {
Ok(Instance { Ok(Instance {
datanode, datanode,
frontend, frontend,
flownode, flow_worker_manager,
flow_shutdown: tx,
procedure_manager, procedure_manager,
wal_options_allocator, wal_options_allocator,
_guard: guard, _guard: guard,
@@ -703,7 +699,7 @@ pub struct StandaloneInformationExtension {
region_server: RegionServer, region_server: RegionServer,
procedure_manager: ProcedureManagerRef, procedure_manager: ProcedureManagerRef,
start_time_ms: u64, start_time_ms: u64,
flow_streaming_engine: RwLock<Option<Arc<StreamingEngine>>>, flow_worker_manager: RwLock<Option<Arc<FlowWorkerManager>>>,
} }
impl StandaloneInformationExtension { impl StandaloneInformationExtension {
@@ -712,14 +708,14 @@ impl StandaloneInformationExtension {
region_server, region_server,
procedure_manager, procedure_manager,
start_time_ms: common_time::util::current_time_millis() as u64, start_time_ms: common_time::util::current_time_millis() as u64,
flow_streaming_engine: RwLock::new(None), flow_worker_manager: RwLock::new(None),
} }
} }
/// Set the flow streaming engine for the standalone instance. /// Set the flow worker manager for the standalone instance.
pub async fn set_flow_streaming_engine(&self, flow_streaming_engine: Arc<StreamingEngine>) { pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowWorkerManager>) {
let mut guard = self.flow_streaming_engine.write().await; let mut guard = self.flow_worker_manager.write().await;
*guard = Some(flow_streaming_engine); *guard = Some(flow_worker_manager);
} }
} }
@@ -788,8 +784,6 @@ impl InformationExtension for StandaloneInformationExtension {
sst_size: region_stat.sst_size, sst_size: region_stat.sst_size,
index_size: region_stat.index_size, index_size: region_stat.index_size,
region_manifest: region_stat.manifest.into(), region_manifest: region_stat.manifest.into(),
data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
} }
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@@ -798,7 +792,7 @@ impl InformationExtension for StandaloneInformationExtension {
async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> { async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
Ok(Some( Ok(Some(
self.flow_streaming_engine self.flow_worker_manager
.read() .read()
.await .await
.as_ref() .as_ref()

148
src/cmd/tests/cli.rs Normal file
View File

@@ -0,0 +1,148 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(target_os = "macos")]
mod tests {
use std::path::PathBuf;
use std::process::{Command, Stdio};
use std::time::Duration;
use common_test_util::temp_dir::create_temp_dir;
use rexpect::session::PtyReplSession;
struct Repl {
repl: PtyReplSession,
}
impl Repl {
fn send_line(&mut self, line: &str) {
let _ = self.repl.send_line(line).unwrap();
// read a line to consume the prompt
let _ = self.read_line();
}
fn read_line(&mut self) -> String {
self.repl.read_line().unwrap()
}
fn read_expect(&mut self, expect: &str) {
assert_eq!(self.read_line(), expect);
}
fn read_contains(&mut self, pat: &str) {
assert!(self.read_line().contains(pat));
}
}
// TODO(LFC): Un-ignore this REPL test.
// Ignore this REPL test because some logical plans like create database are not supported yet in Datanode.
#[ignore]
#[test]
fn test_repl() {
let data_home = create_temp_dir("data");
let wal_dir = create_temp_dir("wal");
let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
bin_path.push("../../target/debug");
let bin_path = bin_path.to_str().unwrap();
let mut datanode = Command::new("./greptime")
.current_dir(bin_path)
.args([
"datanode",
"start",
"--rpc-bind-addr=0.0.0.0:4321",
"--node-id=1",
&format!("--data-home={}", data_home.path().display()),
&format!("--wal-dir={}", wal_dir.path().display()),
])
.stdout(Stdio::null())
.spawn()
.unwrap();
// wait for Datanode actually started
std::thread::sleep(Duration::from_secs(3));
let mut repl_cmd = Command::new("./greptime");
let _ = repl_cmd.current_dir(bin_path).args([
"--log-level=off",
"cli",
"attach",
"--grpc-bind-addr=0.0.0.0:4321",
// history commands can sneaky into stdout and mess up our tests, so disable it
"--disable-helper",
]);
let pty_session = rexpect::session::spawn_command(repl_cmd, Some(5_000)).unwrap();
let repl = PtyReplSession {
prompt: "> ".to_string(),
pty_session,
quit_command: None,
echo_on: false,
};
let repl = &mut Repl { repl };
repl.read_expect("Ready for commands. (Hint: try 'help')");
test_create_database(repl);
test_use_database(repl);
test_create_table(repl);
test_insert(repl);
test_select(repl);
datanode.kill().unwrap();
let _ = datanode.wait().unwrap();
}
fn test_create_database(repl: &mut Repl) {
repl.send_line("CREATE DATABASE db;");
repl.read_expect("Affected Rows: 1");
repl.read_contains("Cost");
}
fn test_use_database(repl: &mut Repl) {
repl.send_line("USE db");
repl.read_expect("Total Rows: 0");
repl.read_contains("Cost");
repl.read_expect("Using db");
}
fn test_create_table(repl: &mut Repl) {
repl.send_line("CREATE TABLE t(x STRING, ts TIMESTAMP TIME INDEX);");
repl.read_expect("Affected Rows: 0");
repl.read_contains("Cost");
}
fn test_insert(repl: &mut Repl) {
repl.send_line("INSERT INTO t(x, ts) VALUES ('hello', 1676895812239);");
repl.read_expect("Affected Rows: 1");
repl.read_contains("Cost");
}
fn test_select(repl: &mut Repl) {
repl.send_line("SELECT * FROM t;");
repl.read_expect("+-------+-------------------------+");
repl.read_expect("| x | ts |");
repl.read_expect("+-------+-------------------------+");
repl.read_expect("| hello | 2023-02-20T12:23:32.239 |");
repl.read_expect("+-------+-------------------------+");
repl.read_expect("Total Rows: 1");
repl.read_contains("Cost");
}
}

View File

@@ -74,7 +74,6 @@ fn test_load_datanode_example_config() {
RegionEngineConfig::File(FileEngineConfig {}), RegionEngineConfig::File(FileEngineConfig {}),
RegionEngineConfig::Metric(MetricEngineConfig { RegionEngineConfig::Metric(MetricEngineConfig {
experimental_sparse_primary_key_encoding: false, experimental_sparse_primary_key_encoding: false,
flush_metadata_region_interval: Duration::from_secs(30),
}), }),
], ],
logging: LoggingOptions { logging: LoggingOptions {
@@ -217,7 +216,6 @@ fn test_load_standalone_example_config() {
RegionEngineConfig::File(FileEngineConfig {}), RegionEngineConfig::File(FileEngineConfig {}),
RegionEngineConfig::Metric(MetricEngineConfig { RegionEngineConfig::Metric(MetricEngineConfig {
experimental_sparse_primary_key_encoding: false, experimental_sparse_primary_key_encoding: false,
flush_metadata_region_interval: Duration::from_secs(30),
}), }),
], ],
storage: StorageConfig { storage: StorageConfig {

View File

@@ -31,8 +31,7 @@ impl Plugins {
} }
pub fn insert<T: 'static + Send + Sync>(&self, value: T) { pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
let last = self.write().insert(value); let _ = self.write().insert(value);
assert!(last.is_none(), "each type of plugins must be one and only");
} }
pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> { pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
@@ -138,12 +137,4 @@ mod tests {
assert_eq!(plugins.len(), 2); assert_eq!(plugins.len(), 2);
assert!(!plugins.is_empty()); assert!(!plugins.is_empty());
} }
#[test]
#[should_panic(expected = "each type of plugins must be one and only")]
fn test_plugin_uniqueness() {
let plugins = Plugins::new();
plugins.insert(1i32);
plugins.insert(2i32);
}
} }

View File

@@ -31,8 +31,7 @@ derive_builder.workspace = true
futures.workspace = true futures.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
object-store.workspace = true object-store.workspace = true
object_store_opendal.workspace = true orc-rust = { version = "0.5", default-features = false, features = [
orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "3134cab581a8e91b942d6a23aca2916ea965f6bb", default-features = false, features = [
"async", "async",
] } ] }
parquet.workspace = true parquet.workspace = true

View File

@@ -19,7 +19,6 @@ use std::str::FromStr;
use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder, ZstdDecoder}; use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder, ZstdDecoder};
use async_compression::tokio::write; use async_compression::tokio::write;
use bytes::Bytes; use bytes::Bytes;
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
use futures::Stream; use futures::Stream;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use strum::EnumIter; use strum::EnumIter;
@@ -193,15 +192,3 @@ macro_rules! impl_compression_type {
} }
impl_compression_type!((Gzip, Gzip), (Bzip2, Bz), (Xz, Xz), (Zstd, Zstd)); impl_compression_type!((Gzip, Gzip), (Bzip2, Bz), (Xz, Xz), (Zstd, Zstd));
impl From<CompressionType> for FileCompressionType {
fn from(t: CompressionType) -> Self {
match t {
CompressionType::Gzip => FileCompressionType::GZIP,
CompressionType::Bzip2 => FileCompressionType::BZIP2,
CompressionType::Xz => FileCompressionType::XZ,
CompressionType::Zstd => FileCompressionType::ZSTD,
CompressionType::Uncompressed => FileCompressionType::UNCOMPRESSED,
}
}
}

View File

@@ -14,23 +14,28 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use arrow::csv; use arrow::csv;
use arrow::csv::reader::Format; use arrow::csv::reader::Format;
use arrow::record_batch::RecordBatch; use arrow::record_batch::RecordBatch;
use arrow_schema::Schema; use arrow_schema::{Schema, SchemaRef};
use async_trait::async_trait; use async_trait::async_trait;
use common_runtime; use common_runtime;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::Result as DataFusionResult;
use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream;
use derive_builder::Builder;
use object_store::ObjectStore; use object_store::ObjectStore;
use snafu::ResultExt; use snafu::ResultExt;
use tokio_util::compat::FuturesAsyncReadCompatExt; use tokio_util::compat::FuturesAsyncReadCompatExt;
use tokio_util::io::SyncIoBridge; use tokio_util::io::SyncIoBridge;
use super::stream_to_file;
use crate::buffered_writer::DfRecordBatchEncoder; use crate::buffered_writer::DfRecordBatchEncoder;
use crate::compression::CompressionType; use crate::compression::CompressionType;
use crate::error::{self, Result}; use crate::error::{self, Result};
use crate::file_format::{self, stream_to_file, FileFormat}; use crate::file_format::{self, open_with_decoder, FileFormat};
use crate::share_buffer::SharedBuffer; use crate::share_buffer::SharedBuffer;
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -95,6 +100,66 @@ impl Default for CsvFormat {
} }
} }
#[derive(Debug, Clone, Builder)]
pub struct CsvConfig {
batch_size: usize,
file_schema: SchemaRef,
#[builder(default = "None")]
file_projection: Option<Vec<usize>>,
#[builder(default = "true")]
has_header: bool,
#[builder(default = "b','")]
delimiter: u8,
}
impl CsvConfig {
fn builder(&self) -> csv::ReaderBuilder {
let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
.with_delimiter(self.delimiter)
.with_batch_size(self.batch_size)
.with_header(self.has_header);
if let Some(proj) = &self.file_projection {
builder = builder.with_projection(proj.clone());
}
builder
}
}
#[derive(Debug, Clone)]
pub struct CsvOpener {
config: Arc<CsvConfig>,
object_store: Arc<ObjectStore>,
compression_type: CompressionType,
}
impl CsvOpener {
/// Return a new [`CsvOpener`]. The caller must ensure [`CsvConfig`].file_schema must correspond to the opening file.
pub fn new(
config: CsvConfig,
object_store: ObjectStore,
compression_type: CompressionType,
) -> Self {
CsvOpener {
config: Arc::new(config),
object_store: Arc::new(object_store),
compression_type,
}
}
}
impl FileOpener for CsvOpener {
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
open_with_decoder(
self.object_store.clone(),
meta.location().to_string(),
self.compression_type,
|| Ok(self.config.builder().build_decoder()),
)
}
}
#[async_trait] #[async_trait]
impl FileFormat for CsvFormat { impl FileFormat for CsvFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> { async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {

View File

@@ -15,24 +15,29 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::io::BufReader; use std::io::BufReader;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use arrow::json; use arrow::datatypes::SchemaRef;
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter}; use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
use arrow::json::writer::LineDelimited; use arrow::json::writer::LineDelimited;
use arrow::json::{self, ReaderBuilder};
use arrow::record_batch::RecordBatch; use arrow::record_batch::RecordBatch;
use arrow_schema::Schema; use arrow_schema::Schema;
use async_trait::async_trait; use async_trait::async_trait;
use common_runtime; use common_runtime;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream;
use object_store::ObjectStore; use object_store::ObjectStore;
use snafu::ResultExt; use snafu::ResultExt;
use tokio_util::compat::FuturesAsyncReadCompatExt; use tokio_util::compat::FuturesAsyncReadCompatExt;
use tokio_util::io::SyncIoBridge; use tokio_util::io::SyncIoBridge;
use super::stream_to_file;
use crate::buffered_writer::DfRecordBatchEncoder; use crate::buffered_writer::DfRecordBatchEncoder;
use crate::compression::CompressionType; use crate::compression::CompressionType;
use crate::error::{self, Result}; use crate::error::{self, Result};
use crate::file_format::{self, stream_to_file, FileFormat}; use crate::file_format::{self, open_with_decoder, FileFormat};
use crate::share_buffer::SharedBuffer; use crate::share_buffer::SharedBuffer;
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -109,6 +114,47 @@ impl FileFormat for JsonFormat {
} }
} }
#[derive(Debug, Clone)]
pub struct JsonOpener {
batch_size: usize,
projected_schema: SchemaRef,
object_store: Arc<ObjectStore>,
compression_type: CompressionType,
}
impl JsonOpener {
/// Return a new [`JsonOpener`]. Any fields not present in `projected_schema` will be ignored.
pub fn new(
batch_size: usize,
projected_schema: SchemaRef,
object_store: ObjectStore,
compression_type: CompressionType,
) -> Self {
Self {
batch_size,
projected_schema,
object_store: Arc::new(object_store),
compression_type,
}
}
}
impl FileOpener for JsonOpener {
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
open_with_decoder(
self.object_store.clone(),
meta.location().to_string(),
self.compression_type,
|| {
ReaderBuilder::new(self.projected_schema.clone())
.with_batch_size(self.batch_size)
.build_decoder()
.map_err(DataFusionError::from)
},
)
}
}
pub async fn stream_to_json( pub async fn stream_to_json(
stream: SendableRecordBatchStream, stream: SendableRecordBatchStream,
store: ObjectStore, store: ObjectStore,

View File

@@ -19,10 +19,7 @@ use std::vec;
use common_test_util::find_workspace_path; use common_test_util::find_workspace_path;
use datafusion::assert_batches_eq; use datafusion::assert_batches_eq;
use datafusion::datasource::file_format::file_compression_type::FileCompressionType; use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
use datafusion::datasource::physical_plan::{
CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener, ParquetExec,
};
use datafusion::execution::context::TaskContext; use datafusion::execution::context::TaskContext;
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::ExecutionPlan;
@@ -30,11 +27,14 @@ use datafusion::prelude::SessionContext;
use futures::StreamExt; use futures::StreamExt;
use super::FORMAT_TYPE; use super::FORMAT_TYPE;
use crate::compression::CompressionType;
use crate::error;
use crate::file_format::csv::{CsvConfigBuilder, CsvOpener};
use crate::file_format::json::JsonOpener;
use crate::file_format::orc::{OrcFormat, OrcOpener}; use crate::file_format::orc::{OrcFormat, OrcOpener};
use crate::file_format::parquet::DefaultParquetFileReaderFactory; use crate::file_format::parquet::DefaultParquetFileReaderFactory;
use crate::file_format::{FileFormat, Format}; use crate::file_format::{FileFormat, Format};
use crate::test_util::{scan_config, test_basic_schema, test_store}; use crate::test_util::{self, scan_config, test_basic_schema, test_store};
use crate::{error, test_util};
struct Test<'a, T: FileOpener> { struct Test<'a, T: FileOpener> {
config: FileScanConfig, config: FileScanConfig,
@@ -62,18 +62,15 @@ impl<T: FileOpener> Test<'_, T> {
#[tokio::test] #[tokio::test]
async fn test_json_opener() { async fn test_json_opener() {
let store = test_store("/"); let store = test_store("/");
let store = Arc::new(object_store_opendal::OpendalStore::new(store));
let schema = test_basic_schema(); let schema = test_basic_schema();
let json_opener = || { let json_opener = JsonOpener::new(
JsonOpener::new( 100,
test_util::TEST_BATCH_SIZE, schema.clone(),
schema.clone(), store.clone(),
FileCompressionType::UNCOMPRESSED, CompressionType::Uncompressed,
store.clone(), );
)
};
let path = &find_workspace_path("/src/common/datasource/tests/json/basic.json") let path = &find_workspace_path("/src/common/datasource/tests/json/basic.json")
.display() .display()
@@ -81,7 +78,7 @@ async fn test_json_opener() {
let tests = [ let tests = [
Test { Test {
config: scan_config(schema.clone(), None, path), config: scan_config(schema.clone(), None, path),
opener: json_opener(), opener: json_opener.clone(),
expected: vec![ expected: vec![
"+-----+-------+", "+-----+-------+",
"| num | str |", "| num | str |",
@@ -94,7 +91,7 @@ async fn test_json_opener() {
}, },
Test { Test {
config: scan_config(schema.clone(), Some(1), path), config: scan_config(schema.clone(), Some(1), path),
opener: json_opener(), opener: json_opener.clone(),
expected: vec![ expected: vec![
"+-----+------+", "+-----+------+",
"| num | str |", "| num | str |",
@@ -113,30 +110,23 @@ async fn test_json_opener() {
#[tokio::test] #[tokio::test]
async fn test_csv_opener() { async fn test_csv_opener() {
let store = test_store("/"); let store = test_store("/");
let store = Arc::new(object_store_opendal::OpendalStore::new(store));
let schema = test_basic_schema(); let schema = test_basic_schema();
let path = &find_workspace_path("/src/common/datasource/tests/csv/basic.csv") let path = &find_workspace_path("/src/common/datasource/tests/csv/basic.csv")
.display() .display()
.to_string(); .to_string();
let csv_config = Arc::new(CsvConfig::new( let csv_conf = CsvConfigBuilder::default()
test_util::TEST_BATCH_SIZE, .batch_size(test_util::TEST_BATCH_SIZE)
schema.clone(), .file_schema(schema.clone())
None, .build()
true, .unwrap();
b',',
b'"',
None,
store,
None,
));
let csv_opener = || CsvOpener::new(csv_config.clone(), FileCompressionType::UNCOMPRESSED); let csv_opener = CsvOpener::new(csv_conf, store, CompressionType::Uncompressed);
let tests = [ let tests = [
Test { Test {
config: scan_config(schema.clone(), None, path), config: scan_config(schema.clone(), None, path),
opener: csv_opener(), opener: csv_opener.clone(),
expected: vec![ expected: vec![
"+-----+-------+", "+-----+-------+",
"| num | str |", "| num | str |",
@@ -149,7 +139,7 @@ async fn test_csv_opener() {
}, },
Test { Test {
config: scan_config(schema.clone(), Some(1), path), config: scan_config(schema.clone(), Some(1), path),
opener: csv_opener(), opener: csv_opener.clone(),
expected: vec![ expected: vec![
"+-----+------+", "+-----+------+",
"| num | str |", "| num | str |",

View File

@@ -16,19 +16,17 @@ use std::sync::Arc;
use arrow_schema::{DataType, Field, Schema, SchemaRef}; use arrow_schema::{DataType, Field, Schema, SchemaRef};
use common_test_util::temp_dir::{create_temp_dir, TempDir}; use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datafusion::common::{Constraints, Statistics}; use datafusion::common::Statistics;
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::datasource::physical_plan::{ use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
CsvConfig, CsvOpener, FileScanConfig, FileStream, JsonOpener,
};
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use object_store::services::Fs; use object_store::services::Fs;
use object_store::ObjectStore; use object_store::ObjectStore;
use crate::file_format::csv::stream_to_csv; use crate::compression::CompressionType;
use crate::file_format::json::stream_to_json; use crate::file_format::csv::{stream_to_csv, CsvConfigBuilder, CsvOpener};
use crate::file_format::json::{stream_to_json, JsonOpener};
use crate::test_util; use crate::test_util;
pub const TEST_BATCH_SIZE: usize = 100; pub const TEST_BATCH_SIZE: usize = 100;
@@ -76,7 +74,6 @@ pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str)
object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
file_schema, file_schema,
file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]], file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
constraints: Constraints::empty(),
statistics, statistics,
projection: None, projection: None,
limit, limit,
@@ -93,8 +90,8 @@ pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usi
let json_opener = JsonOpener::new( let json_opener = JsonOpener::new(
test_util::TEST_BATCH_SIZE, test_util::TEST_BATCH_SIZE,
schema.clone(), schema.clone(),
FileCompressionType::UNCOMPRESSED, store.clone(),
Arc::new(object_store_opendal::OpendalStore::new(store.clone())), CompressionType::Uncompressed,
); );
let size = store.read(origin_path).await.unwrap().len(); let size = store.read(origin_path).await.unwrap().len();
@@ -127,19 +124,13 @@ pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usiz
let schema = test_basic_schema(); let schema = test_basic_schema();
let csv_config = Arc::new(CsvConfig::new( let csv_conf = CsvConfigBuilder::default()
TEST_BATCH_SIZE, .batch_size(test_util::TEST_BATCH_SIZE)
schema.clone(), .file_schema(schema.clone())
None, .build()
true, .unwrap();
b',',
b'"',
None,
Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
None,
));
let csv_opener = CsvOpener::new(csv_config, FileCompressionType::UNCOMPRESSED); let csv_opener = CsvOpener::new(csv_conf, store.clone(), CompressionType::Uncompressed);
let size = store.read(origin_path).await.unwrap().len(); let size = store.read(origin_path).await.unwrap().len();

View File

@@ -12,6 +12,3 @@ http.workspace = true
snafu.workspace = true snafu.workspace = true
strum.workspace = true strum.workspace = true
tonic.workspace = true tonic.workspace = true
[dev-dependencies]
common-macro.workspace = true

View File

@@ -42,7 +42,7 @@ pub trait ErrorExt: StackError {
if let Some(external_error) = error.source() { if let Some(external_error) = error.source() {
let external_root = external_error.sources().last().unwrap(); let external_root = external_error.sources().last().unwrap();
if error.transparent() { if error.to_string().is_empty() {
format!("{external_root}") format!("{external_root}")
} else { } else {
format!("{error}: {external_root}") format!("{error}: {external_root}")
@@ -86,14 +86,6 @@ pub trait StackError: std::error::Error {
} }
result result
} }
/// Indicates whether this error is "transparent", that it delegates its "display" and "source"
/// to the underlying error. Could be useful when you are just wrapping some external error,
/// **AND** can not or would not provide meaningful contextual info. For example, the
/// `DataFusionError`.
fn transparent(&self) -> bool {
false
}
} }
impl<T: ?Sized + StackError> StackError for Arc<T> { impl<T: ?Sized + StackError> StackError for Arc<T> {

View File

@@ -34,14 +34,12 @@ pub enum StatusCode {
Internal = 1003, Internal = 1003,
/// Invalid arguments. /// Invalid arguments.
InvalidArguments = 1004, InvalidArguments = 1004,
/// The task is cancelled (typically caller-side). /// The task is cancelled.
Cancelled = 1005, Cancelled = 1005,
/// Illegal state, can be exposed to users. /// Illegal state, can be exposed to users.
IllegalState = 1006, IllegalState = 1006,
/// Caused by some error originated from external system. /// Caused by some error originated from external system.
External = 1007, External = 1007,
/// The request is deadline exceeded (typically server-side).
DeadlineExceeded = 1008,
// ====== End of common status code ================ // ====== End of common status code ================
// ====== Begin of SQL related status code ========= // ====== Begin of SQL related status code =========
@@ -144,7 +142,6 @@ impl StatusCode {
| StatusCode::Unexpected | StatusCode::Unexpected
| StatusCode::InvalidArguments | StatusCode::InvalidArguments
| StatusCode::Cancelled | StatusCode::Cancelled
| StatusCode::DeadlineExceeded
| StatusCode::InvalidSyntax | StatusCode::InvalidSyntax
| StatusCode::DatabaseAlreadyExists | StatusCode::DatabaseAlreadyExists
| StatusCode::PlanQuery | StatusCode::PlanQuery
@@ -180,7 +177,6 @@ impl StatusCode {
| StatusCode::Unexpected | StatusCode::Unexpected
| StatusCode::Internal | StatusCode::Internal
| StatusCode::Cancelled | StatusCode::Cancelled
| StatusCode::DeadlineExceeded
| StatusCode::IllegalState | StatusCode::IllegalState
| StatusCode::EngineExecuteQuery | StatusCode::EngineExecuteQuery
| StatusCode::StorageUnavailable | StatusCode::StorageUnavailable
@@ -276,7 +272,6 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
Code::InvalidArgument Code::InvalidArgument
} }
StatusCode::Cancelled => Code::Cancelled, StatusCode::Cancelled => Code::Cancelled,
StatusCode::DeadlineExceeded => Code::DeadlineExceeded,
StatusCode::TableAlreadyExists StatusCode::TableAlreadyExists
| StatusCode::TableColumnExists | StatusCode::TableColumnExists
| StatusCode::RegionAlreadyExists | StatusCode::RegionAlreadyExists
@@ -304,15 +299,6 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
} }
} }
/// Converts tonic [Code] to [StatusCode].
pub fn convert_tonic_code_to_status_code(code: Code) -> StatusCode {
match code {
Code::Cancelled => StatusCode::Cancelled,
Code::DeadlineExceeded => StatusCode::DeadlineExceeded,
_ => StatusCode::Internal,
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use strum::IntoEnumIterator; use strum::IntoEnumIterator;

View File

@@ -1,115 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use common_error::ext::{ErrorExt, PlainError, StackError};
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, ResultExt, Snafu};
#[derive(Snafu)]
#[stack_trace_debug]
enum MyError {
#[snafu(display(r#"A normal error with "display" attribute, message "{}""#, message))]
Normal {
message: String,
#[snafu(source)]
error: PlainError,
#[snafu(implicit)]
location: Location,
},
#[snafu(transparent)]
Transparent {
#[snafu(source)]
error: PlainError,
#[snafu(implicit)]
location: Location,
},
}
impl ErrorExt for MyError {
fn status_code(&self) -> StatusCode {
StatusCode::Unexpected
}
fn as_any(&self) -> &dyn Any {
self
}
}
fn normal_error() -> Result<(), MyError> {
let plain_error = PlainError::new("<root cause>".to_string(), StatusCode::Unexpected);
Err(plain_error).context(NormalSnafu { message: "blabla" })
}
fn transparent_error() -> Result<(), MyError> {
let plain_error = PlainError::new("<root cause>".to_string(), StatusCode::Unexpected);
Err(plain_error)?
}
#[test]
fn test_output_msg() {
let result = normal_error();
assert_eq!(
result.unwrap_err().output_msg(),
r#"A normal error with "display" attribute, message "blabla": <root cause>"#
);
let result = transparent_error();
assert_eq!(result.unwrap_err().output_msg(), "<root cause>");
}
#[test]
fn test_to_string() {
let result = normal_error();
assert_eq!(
result.unwrap_err().to_string(),
r#"A normal error with "display" attribute, message "blabla""#
);
let result = transparent_error();
assert_eq!(result.unwrap_err().to_string(), "<root cause>");
}
#[test]
fn test_debug_format() {
let result = normal_error();
let debug_output = format!("{:?}", result.unwrap_err());
let normalized_output = debug_output.replace('\\', "/");
assert_eq!(
normalized_output,
r#"0: A normal error with "display" attribute, message "blabla", at src/common/error/tests/ext.rs:55:22
1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
);
let result = transparent_error();
let debug_output = format!("{:?}", result.unwrap_err());
let normalized_output = debug_output.replace('\\', "/");
assert_eq!(
normalized_output,
r#"0: <transparent>, at src/common/error/tests/ext.rs:60:5
1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
);
}
#[test]
fn test_transparent_flag() {
let result = normal_error();
assert!(!result.unwrap_err().transparent());
let result = transparent_error();
assert!(result.unwrap_err().transparent());
}

View File

@@ -8,7 +8,6 @@ license.workspace = true
workspace = true workspace = true
[features] [features]
testing = []
default = ["geo"] default = ["geo"]
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"] geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
@@ -18,7 +17,6 @@ api.workspace = true
arc-swap = "1.0" arc-swap = "1.0"
async-trait.workspace = true async-trait.workspace = true
bincode = "1.3" bincode = "1.3"
catalog.workspace = true
chrono.workspace = true chrono.workspace = true
common-base.workspace = true common-base.workspace = true
common-catalog.workspace = true common-catalog.workspace = true

View File

@@ -25,13 +25,12 @@ use session::context::QueryContextRef;
use crate::handlers::ProcedureServiceHandlerRef; use crate::handlers::ProcedureServiceHandlerRef;
use crate::helper::cast_u64; use crate::helper::cast_u64;
/// The default timeout for migrate region procedure. const DEFAULT_TIMEOUT_SECS: u64 = 30;
const DEFAULT_TIMEOUT_SECS: u64 = 300;
/// A function to migrate a region from source peer to target peer. /// A function to migrate a region from source peer to target peer.
/// Returns the submitted procedure id if success. Only available in cluster mode. /// Returns the submitted procedure id if success. Only available in cluster mode.
/// ///
/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(300 seconds). /// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`. /// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
/// ///
/// The parameters: /// The parameters:

View File

@@ -32,7 +32,7 @@ pub struct FunctionContext {
impl FunctionContext { impl FunctionContext {
/// Create a mock [`FunctionContext`] for test. /// Create a mock [`FunctionContext`] for test.
#[cfg(any(test, feature = "testing"))] #[cfg(test)]
pub fn mock() -> Self { pub fn mock() -> Self {
Self { Self {
query_ctx: QueryContextBuilder::default().build().into(), query_ctx: QueryContextBuilder::default().build().into(),

View File

@@ -15,7 +15,6 @@
use std::sync::Arc; use std::sync::Arc;
use async_trait::async_trait; use async_trait::async_trait;
use catalog::CatalogManagerRef;
use common_base::AffectedRows; use common_base::AffectedRows;
use common_meta::rpc::procedure::{ use common_meta::rpc::procedure::{
AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse, AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
@@ -73,9 +72,6 @@ pub trait ProcedureServiceHandler: Send + Sync {
/// Remove a region follower from a region. /// Remove a region follower from a region.
async fn remove_region_follower(&self, request: RemoveRegionFollowerRequest) -> Result<()>; async fn remove_region_follower(&self, request: RemoveRegionFollowerRequest) -> Result<()>;
/// Get the catalog manager
fn catalog_manager(&self) -> &CatalogManagerRef;
} }
/// This flow service handler is only use for flush flow for now. /// This flow service handler is only use for flush flow for now.

View File

@@ -27,7 +27,7 @@ use datatypes::value::{ListValue, Value};
use datatypes::vectors::VectorRef; use datatypes::vectors::VectorRef;
use snafu::{ensure, ResultExt}; use snafu::{ensure, ResultExt};
use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n}; use super::helpers::{ensure_columns_len, ensure_columns_n};
/// Accumulator of lat, lng, timestamp tuples /// Accumulator of lat, lng, timestamp tuples
#[derive(Debug)] #[derive(Debug)]

View File

@@ -31,8 +31,8 @@ use h3o::{CellIndex, LatLng, Resolution};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use snafu::ResultExt; use snafu::ResultExt;
use super::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
use crate::function::{Function, FunctionContext}; use crate::function::{Function, FunctionContext};
use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| { static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![ vec![

View File

@@ -26,9 +26,9 @@ use geo::{Area, Distance, Haversine};
use geo_types::Geometry; use geo_types::Geometry;
use snafu::ResultExt; use snafu::ResultExt;
use super::helpers::{ensure_columns_len, ensure_columns_n};
use super::wkt::parse_wkt;
use crate::function::{Function, FunctionContext}; use crate::function::{Function, FunctionContext};
use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
use crate::scalars::geo::wkt::parse_wkt;
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree /// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
#[derive(Clone, Debug, Default, Display)] #[derive(Clone, Debug, Default, Display)]

View File

@@ -23,9 +23,9 @@ use geo::algorithm::contains::Contains;
use geo::algorithm::intersects::Intersects; use geo::algorithm::intersects::Intersects;
use geo::algorithm::within::Within; use geo::algorithm::within::Within;
use super::helpers::{ensure_columns_len, ensure_columns_n};
use super::wkt::parse_wkt;
use crate::function::{Function, FunctionContext}; use crate::function::{Function, FunctionContext};
use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
use crate::scalars::geo::wkt::parse_wkt;
/// Test if spatial relationship: contains /// Test if spatial relationship: contains
#[derive(Clone, Debug, Default, Display)] #[derive(Clone, Debug, Default, Display)]

View File

@@ -26,8 +26,8 @@ use once_cell::sync::Lazy;
use snafu::ResultExt; use snafu::ResultExt;
use wkt::{ToWkt, TryFromWkt}; use wkt::{ToWkt, TryFromWkt};
use super::helpers::{ensure_columns_len, ensure_columns_n};
use crate::function::{Function, FunctionContext}; use crate::function::{Function, FunctionContext};
use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| { static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![ vec![

View File

@@ -115,13 +115,6 @@ impl Function for UddSketchCalcFunction {
} }
}; };
// Check if the sketch is empty, if so, return null
// This is important to avoid panics when calling estimate_quantile on an empty sketch
// In practice, this will happen if input is all null
if sketch.bucket_iter().count() == 0 {
builder.push_null();
continue;
}
// Compute the estimated quantile from the sketch // Compute the estimated quantile from the sketch
let result = sketch.estimate_quantile(perc); let result = sketch.estimate_quantile(perc);
builder.push(Some(result)); builder.push(Some(result));

View File

@@ -163,7 +163,7 @@ mod tests {
]; ];
let args = ScalarFunctionArgs { let args = ScalarFunctionArgs {
args, args: &args,
number_rows: 4, number_rows: 4,
return_type: &ConcreteDataType::boolean_datatype().as_arrow_type(), return_type: &ConcreteDataType::boolean_datatype().as_arrow_type(),
}; };

View File

@@ -28,13 +28,12 @@ pub struct FunctionState {
impl FunctionState { impl FunctionState {
/// Create a mock [`FunctionState`] for test. /// Create a mock [`FunctionState`] for test.
#[cfg(any(test, feature = "testing"))] #[cfg(test)]
pub fn mock() -> Self { pub fn mock() -> Self {
use std::sync::Arc; use std::sync::Arc;
use api::v1::meta::ProcedureStatus; use api::v1::meta::ProcedureStatus;
use async_trait::async_trait; use async_trait::async_trait;
use catalog::CatalogManagerRef;
use common_base::AffectedRows; use common_base::AffectedRows;
use common_meta::rpc::procedure::{ use common_meta::rpc::procedure::{
AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse, AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
@@ -81,10 +80,6 @@ impl FunctionState {
) -> Result<()> { ) -> Result<()> {
Ok(()) Ok(())
} }
fn catalog_manager(&self) -> &CatalogManagerRef {
unimplemented!()
}
} }
#[async_trait] #[async_trait]

View File

@@ -23,11 +23,8 @@ flatbuffers = "24"
hyper.workspace = true hyper.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
prost.workspace = true prost.workspace = true
serde.workspace = true
serde_json.workspace = true
snafu.workspace = true snafu.workspace = true
tokio.workspace = true tokio.workspace = true
tokio-util.workspace = true
tonic.workspace = true tonic.workspace = true
tower.workspace = true tower.workspace = true

View File

@@ -22,7 +22,6 @@ use dashmap::mapref::entry::Entry;
use dashmap::DashMap; use dashmap::DashMap;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use tokio_util::sync::CancellationToken;
use tonic::transport::{ use tonic::transport::{
Certificate, Channel as InnerChannel, ClientTlsConfig, Endpoint, Identity, Uri, Certificate, Channel as InnerChannel, ClientTlsConfig, Endpoint, Identity, Uri,
}; };
@@ -40,48 +39,18 @@ lazy_static! {
static ref ID: AtomicU64 = AtomicU64::new(0); static ref ID: AtomicU64 = AtomicU64::new(0);
} }
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug)]
pub struct ChannelManager { pub struct ChannelManager {
inner: Arc<Inner>,
}
#[derive(Debug)]
struct Inner {
id: u64, id: u64,
config: ChannelConfig, config: ChannelConfig,
client_tls_config: Option<ClientTlsConfig>, client_tls_config: Option<ClientTlsConfig>,
pool: Arc<Pool>, pool: Arc<Pool>,
channel_recycle_started: AtomicBool, channel_recycle_started: Arc<AtomicBool>,
cancel: CancellationToken,
} }
impl Default for Inner { impl Default for ChannelManager {
fn default() -> Self { fn default() -> Self {
Self::with_config(ChannelConfig::default()) ChannelManager::with_config(ChannelConfig::default())
}
}
impl Drop for Inner {
fn drop(&mut self) {
// Cancel the channel recycle task.
self.cancel.cancel();
}
}
impl Inner {
fn with_config(config: ChannelConfig) -> Self {
let id = ID.fetch_add(1, Ordering::Relaxed);
let pool = Arc::new(Pool::default());
let cancel = CancellationToken::new();
Self {
id,
config,
client_tls_config: None,
pool,
channel_recycle_started: AtomicBool::new(false),
cancel,
}
} }
} }
@@ -91,14 +60,19 @@ impl ChannelManager {
} }
pub fn with_config(config: ChannelConfig) -> Self { pub fn with_config(config: ChannelConfig) -> Self {
let inner = Inner::with_config(config); let id = ID.fetch_add(1, Ordering::Relaxed);
let pool = Arc::new(Pool::default());
Self { Self {
inner: Arc::new(inner), id,
config,
client_tls_config: None,
pool,
channel_recycle_started: Arc::new(AtomicBool::new(false)),
} }
} }
pub fn with_tls_config(config: ChannelConfig) -> Result<Self> { pub fn with_tls_config(config: ChannelConfig) -> Result<Self> {
let mut inner = Inner::with_config(config.clone()); let mut cm = Self::with_config(config.clone());
// setup tls // setup tls
let path_config = config.client_tls.context(InvalidTlsConfigSnafu { let path_config = config.client_tls.context(InvalidTlsConfigSnafu {
@@ -114,23 +88,17 @@ impl ChannelManager {
.context(InvalidConfigFilePathSnafu)?; .context(InvalidConfigFilePathSnafu)?;
let client_identity = Identity::from_pem(client_cert, client_key); let client_identity = Identity::from_pem(client_cert, client_key);
inner.client_tls_config = Some( cm.client_tls_config = Some(
ClientTlsConfig::new() ClientTlsConfig::new()
.ca_certificate(server_root_ca_cert) .ca_certificate(server_root_ca_cert)
.identity(client_identity), .identity(client_identity),
); );
Ok(Self { Ok(cm)
inner: Arc::new(inner),
})
} }
pub fn config(&self) -> &ChannelConfig { pub fn config(&self) -> &ChannelConfig {
&self.inner.config &self.config
}
fn pool(&self) -> &Arc<Pool> {
&self.inner.pool
} }
pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> { pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
@@ -138,12 +106,12 @@ impl ChannelManager {
let addr = addr.as_ref(); let addr = addr.as_ref();
// It will acquire the read lock. // It will acquire the read lock.
if let Some(inner_ch) = self.pool().get(addr) { if let Some(inner_ch) = self.pool.get(addr) {
return Ok(inner_ch); return Ok(inner_ch);
} }
// It will acquire the write lock. // It will acquire the write lock.
let entry = match self.pool().entry(addr.to_string()) { let entry = match self.pool.entry(addr.to_string()) {
Entry::Occupied(entry) => { Entry::Occupied(entry) => {
entry.get().increase_access(); entry.get().increase_access();
entry.into_ref() entry.into_ref()
@@ -182,7 +150,7 @@ impl ChannelManager {
access: AtomicUsize::new(1), access: AtomicUsize::new(1),
use_default_connector: false, use_default_connector: false,
}; };
self.pool().put(addr, channel); self.pool.put(addr, channel);
Ok(inner_channel) Ok(inner_channel)
} }
@@ -191,11 +159,11 @@ impl ChannelManager {
where where
F: FnMut(&String, &mut Channel) -> bool, F: FnMut(&String, &mut Channel) -> bool,
{ {
self.pool().retain_channel(f); self.pool.retain_channel(f);
} }
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> { fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
let http_prefix = if self.inner.client_tls_config.is_some() { let http_prefix = if self.client_tls_config.is_some() {
"https" "https"
} else { } else {
"http" "http"
@@ -204,52 +172,51 @@ impl ChannelManager {
let mut endpoint = let mut endpoint =
Endpoint::new(format!("{http_prefix}://{addr}")).context(CreateChannelSnafu)?; Endpoint::new(format!("{http_prefix}://{addr}")).context(CreateChannelSnafu)?;
if let Some(dur) = self.config().timeout { if let Some(dur) = self.config.timeout {
endpoint = endpoint.timeout(dur); endpoint = endpoint.timeout(dur);
} }
if let Some(dur) = self.config().connect_timeout { if let Some(dur) = self.config.connect_timeout {
endpoint = endpoint.connect_timeout(dur); endpoint = endpoint.connect_timeout(dur);
} }
if let Some(limit) = self.config().concurrency_limit { if let Some(limit) = self.config.concurrency_limit {
endpoint = endpoint.concurrency_limit(limit); endpoint = endpoint.concurrency_limit(limit);
} }
if let Some((limit, dur)) = self.config().rate_limit { if let Some((limit, dur)) = self.config.rate_limit {
endpoint = endpoint.rate_limit(limit, dur); endpoint = endpoint.rate_limit(limit, dur);
} }
if let Some(size) = self.config().initial_stream_window_size { if let Some(size) = self.config.initial_stream_window_size {
endpoint = endpoint.initial_stream_window_size(size); endpoint = endpoint.initial_stream_window_size(size);
} }
if let Some(size) = self.config().initial_connection_window_size { if let Some(size) = self.config.initial_connection_window_size {
endpoint = endpoint.initial_connection_window_size(size); endpoint = endpoint.initial_connection_window_size(size);
} }
if let Some(dur) = self.config().http2_keep_alive_interval { if let Some(dur) = self.config.http2_keep_alive_interval {
endpoint = endpoint.http2_keep_alive_interval(dur); endpoint = endpoint.http2_keep_alive_interval(dur);
} }
if let Some(dur) = self.config().http2_keep_alive_timeout { if let Some(dur) = self.config.http2_keep_alive_timeout {
endpoint = endpoint.keep_alive_timeout(dur); endpoint = endpoint.keep_alive_timeout(dur);
} }
if let Some(enabled) = self.config().http2_keep_alive_while_idle { if let Some(enabled) = self.config.http2_keep_alive_while_idle {
endpoint = endpoint.keep_alive_while_idle(enabled); endpoint = endpoint.keep_alive_while_idle(enabled);
} }
if let Some(enabled) = self.config().http2_adaptive_window { if let Some(enabled) = self.config.http2_adaptive_window {
endpoint = endpoint.http2_adaptive_window(enabled); endpoint = endpoint.http2_adaptive_window(enabled);
} }
if let Some(tls_config) = &self.inner.client_tls_config { if let Some(tls_config) = &self.client_tls_config {
endpoint = endpoint endpoint = endpoint
.tls_config(tls_config.clone()) .tls_config(tls_config.clone())
.context(CreateChannelSnafu)?; .context(CreateChannelSnafu)?;
} }
endpoint = endpoint endpoint = endpoint
.tcp_keepalive(self.config().tcp_keepalive) .tcp_keepalive(self.config.tcp_keepalive)
.tcp_nodelay(self.config().tcp_nodelay); .tcp_nodelay(self.config.tcp_nodelay);
Ok(endpoint) Ok(endpoint)
} }
fn trigger_channel_recycling(&self) { fn trigger_channel_recycling(&self) {
if self if self
.inner
.channel_recycle_started .channel_recycle_started
.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed) .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
.is_err() .is_err()
@@ -257,15 +224,13 @@ impl ChannelManager {
return; return;
} }
let pool = self.pool().clone(); let pool = self.pool.clone();
let cancel = self.inner.cancel.clone(); let _handle = common_runtime::spawn_global(async {
let id = self.inner.id; recycle_channel_in_loop(pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
let _handle = common_runtime::spawn_global(async move {
recycle_channel_in_loop(pool, id, cancel, RECYCLE_CHANNEL_INTERVAL_SECS).await;
}); });
info!( info!(
"ChannelManager: {}, channel recycle is started, running in the background!", "ChannelManager: {}, channel recycle is started, running in the background!",
self.inner.id self.id
); );
} }
} }
@@ -478,23 +443,11 @@ impl Pool {
} }
} }
async fn recycle_channel_in_loop( async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
pool: Arc<Pool>,
id: u64,
cancel: CancellationToken,
interval_secs: u64,
) {
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs)); let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
loop { loop {
tokio::select! { let _ = interval.tick().await;
_ = cancel.cancelled() => {
info!("Stop channel recycle, ChannelManager id: {}", id);
break;
},
_ = interval.tick() => {}
}
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0) pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
} }
} }
@@ -508,7 +461,11 @@ mod tests {
#[should_panic] #[should_panic]
#[test] #[test]
fn test_invalid_addr() { fn test_invalid_addr() {
let mgr = ChannelManager::default(); let pool = Arc::new(Pool::default());
let mgr = ChannelManager {
pool,
..Default::default()
};
let addr = "http://test"; let addr = "http://test";
let _ = mgr.get(addr).unwrap(); let _ = mgr.get(addr).unwrap();
@@ -518,9 +475,7 @@ mod tests {
async fn test_access_count() { async fn test_access_count() {
let mgr = ChannelManager::new(); let mgr = ChannelManager::new();
// Do not start recycle // Do not start recycle
mgr.inner mgr.channel_recycle_started.store(true, Ordering::Relaxed);
.channel_recycle_started
.store(true, Ordering::Relaxed);
let mgr = Arc::new(mgr); let mgr = Arc::new(mgr);
let addr = "test_uri"; let addr = "test_uri";
@@ -538,12 +493,12 @@ mod tests {
join.await.unwrap(); join.await.unwrap();
} }
assert_eq!(1000, mgr.pool().get_access(addr).unwrap()); assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
mgr.pool() mgr.pool
.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0); .retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);
assert_eq!(0, mgr.pool().get_access(addr).unwrap()); assert_eq!(0, mgr.pool.get_access(addr).unwrap());
} }
#[test] #[test]
@@ -669,49 +624,4 @@ mod tests {
true true
}); });
} }
#[tokio::test]
async fn test_pool_release_with_channel_recycle() {
let mgr = ChannelManager::new();
let pool_holder = mgr.pool().clone();
// start channel recycle task
let addr = "test_addr";
let _ = mgr.get(addr);
let mgr_clone_1 = mgr.clone();
let mgr_clone_2 = mgr.clone();
assert_eq!(3, Arc::strong_count(mgr.pool()));
drop(mgr_clone_1);
drop(mgr_clone_2);
assert_eq!(3, Arc::strong_count(mgr.pool()));
drop(mgr);
// wait for the channel recycle task to finish
tokio::time::sleep(Duration::from_millis(10)).await;
assert_eq!(1, Arc::strong_count(&pool_holder));
}
#[tokio::test]
async fn test_pool_release_without_channel_recycle() {
let mgr = ChannelManager::new();
let pool_holder = mgr.pool().clone();
let mgr_clone_1 = mgr.clone();
let mgr_clone_2 = mgr.clone();
assert_eq!(2, Arc::strong_count(mgr.pool()));
drop(mgr_clone_1);
drop(mgr_clone_2);
assert_eq!(2, Arc::strong_count(mgr.pool()));
drop(mgr);
assert_eq!(1, Arc::strong_count(&pool_holder));
}
} }

View File

@@ -97,14 +97,6 @@ pub enum Error {
#[snafu(display("Not supported: {}", feat))] #[snafu(display("Not supported: {}", feat))]
NotSupported { feat: String }, NotSupported { feat: String },
#[snafu(display("Failed to serde Json"))]
SerdeJson {
#[snafu(source)]
error: serde_json::error::Error,
#[snafu(implicit)]
location: Location,
},
} }
impl ErrorExt for Error { impl ErrorExt for Error {
@@ -118,8 +110,7 @@ impl ErrorExt for Error {
Error::CreateChannel { .. } Error::CreateChannel { .. }
| Error::Conversion { .. } | Error::Conversion { .. }
| Error::DecodeFlightData { .. } | Error::DecodeFlightData { .. } => StatusCode::Internal,
| Error::SerdeJson { .. } => StatusCode::Internal,
Error::CreateRecordBatch { source, .. } => source.status_code(), Error::CreateRecordBatch { source, .. } => source.status_code(),
Error::ConvertArrowSchema { source, .. } => source.status_code(), Error::ConvertArrowSchema { source, .. } => source.status_code(),

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
pub mod do_put;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;

Some files were not shown because too many files have changed in this diff Show More