assign partition_ranges

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Revert "feat: keep parallelize_scan unchanged"
2026-01-07 05:42:57 +00:00 · 2025-04-08 21:36:29 +08:00 · 2025-04-08 21:16:09 +08:00 · 2025-04-08 21:00:25 +08:00 · 2025-04-08 20:54:34 +08:00 · 2025-04-08 20:52:19 +08:00
576 changed files with 17463 additions and 36427 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -0,0 +1,15 @@
 # yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
 language: "en-US"
 early_access: false
 reviews:
  profile: "chill"
  request_changes_workflow: false
  high_level_summary: true
  poem: true
  review_status: true
  collapse_walkthrough: false
  auto_review:
    enabled: false
    drafts: false
 chat:
  auto_reply: true
--- a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
@@ -2,14 +2,13 @@ meta:
  configData: |-
    [runtime]
    global_rt_size = 4
-
+    
    [wal]
    provider = "kafka"
    broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
    num_topics = 3
    auto_prune_interval = "30s"
    trigger_flush_threshold = 100
    [datanode]
    [datanode.client]
    timeout = "120s"
@@ -23,7 +22,6 @@ datanode:
    provider = "kafka"
    broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
    linger = "2ms"
    overwrite_entry_start_id = true
 frontend:
  configData: |-
    [runtime]
--- a/.github/scripts/create-version.sh
+++ b/.github/scripts/create-version.sh
@@ -25,7 +25,7 @@ function create_version() {
  fi
  # Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
-  # It will be like 'nightly-20230808-7d0d8dc6'.
+  # It will be like 'nigtly-20230808-7d0d8dc6'.
  if [ "$NEXT_RELEASE_VERSION" = nightly ]; then
    echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
    exit 0
@@ -60,9 +60,9 @@ function create_version() {
 }
 # You can run as following examples:
-#  GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
+#  GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
-#  GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
-#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
-#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
-#  GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
 create_version
--- a/.github/workflows/grafana.yml
+++ b/.github/workflows/grafana.yml
@@ -21,6 +21,32 @@ jobs:
        run: sudo apt-get install -y jq
      # Make the check.sh script executable
-      - name: Check grafana dashboards
+      - name: Make check.sh executable
        run: chmod +x grafana/check.sh
      # Run the check.sh script
      - name: Run check.sh
        run: ./grafana/check.sh
      # Only run summary.sh for pull_request events (not for merge queues or final pushes)
      - name: Check if this is a pull request
        id: check-pr
        run: |
-          make check-dashboards
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            echo "is_pull_request=true" >> $GITHUB_OUTPUT
          else
            echo "is_pull_request=false" >> $GITHUB_OUTPUT
          fi
      # Make the summary.sh script executable
      - name: Make summary.sh executable
        if: steps.check-pr.outputs.is_pull_request == 'true'
        run: chmod +x grafana/summary.sh
      # Run the summary.sh script and add its output to the GitHub Job Summary
      - name: Run summary.sh and add to Job Summary
        if: steps.check-pr.outputs.is_pull_request == 'true'
        run: |
          SUMMARY=$(./grafana/summary.sh)
          echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -317,7 +317,7 @@ jobs:
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: true
      - name: Set build image result
        id: set-build-image-result
@@ -364,7 +364,7 @@ jobs:
          dev-mode: false
          upload-to-s3: true
          update-version-info: true
-          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: true
  publish-github-release:
    name: Create GitHub release and upload artifacts
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,6 +77,7 @@ clippy.print_stdout = "warn"
 clippy.print_stderr = "warn"
 clippy.dbg_macro = "warn"
 clippy.implicit_clone = "warn"
 clippy.readonly_write_lock = "allow"
 rust.unknown_lints = "deny"
 rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
@@ -89,11 +90,11 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
 # See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.6"
-arrow = { version = "54.2", features = ["prettyprint"] }
+arrow = { version = "53.0.0", features = ["prettyprint"] }
-arrow-array = { version = "54.2", default-features = false, features = ["chrono-tz"] }
+arrow-array = { version = "53.0.0", default-features = false, features = ["chrono-tz"] }
-arrow-flight = "54.2"
+arrow-flight = "53.0"
-arrow-ipc = { version = "54.2", default-features = false, features = ["lz4", "zstd"] }
+arrow-ipc = { version = "53.0.0", default-features = false, features = ["lz4", "zstd"] }
-arrow-schema = { version = "54.2", features = ["serde"] }
+arrow-schema = { version = "53.0", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 # Remember to update axum-extra, axum-macros when updating axum
@@ -112,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
-datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
-datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
 deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
@@ -129,7 +130,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "e82b0158cd38d4021edb4e4c0ae77f999051e62f" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "dd4a1996982534636734674db66e44464b0c0d83" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -147,7 +148,6 @@ moka = "0.12"
 nalgebra = "0.33"
 notify = "8.0"
 num_cpus = "1.16"
 object_store_opendal = "0.50"
 once_cell = "1.18"
 opentelemetry-proto = { version = "0.27", features = [
    "gen-tonic",
@@ -157,11 +157,11 @@ opentelemetry-proto = { version = "0.27", features = [
    "logs",
 ] }
 parking_lot = "0.12"
-parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
+parquet = { version = "53.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.5.1", features = ["ser"] }
+promql-parser = { version = "0.5", features = ["ser"] }
 prost = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
@@ -191,18 +191,19 @@ simd-json = "0.15"
 similar-asserts = "1.6.0"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
 sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [
    "visitor",
    "serde",
 ] } # branch = "v0.54.x"
 sqlx = { version = "0.8", features = [
    "runtime-tokio-rustls",
    "mysql",
    "postgres",
    "chrono",
 ] }
 strum = { version = "0.27", features = ["derive"] }
 sysinfo = "0.33"
 # on branch v0.52.x
 sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
    "visitor",
    "serde",
 ] } # on branch v0.44.x
 strum = { version = "0.27", features = ["derive"] }
 tempfile = "3"
 tokio = { version = "1.40", features = ["full"] }
 tokio-postgres = "0.7"
@@ -269,9 +270,6 @@ metric-engine = { path = "src/metric-engine" }
 mito2 = { path = "src/mito2" }
 object-store = { path = "src/object-store" }
 operator = { path = "src/operator" }
 otel-arrow-rust = { git = "https://github.com/open-telemetry/otel-arrow", rev = "5d551412d2a12e689cde4d84c14ef29e36784e51", features = [
    "server",
 ] }
 partition = { path = "src/partition" }
 pipeline = { path = "src/pipeline" }
 plugins = { path = "src/plugins" }
--- a/15
+++ b/15
@@ -32,10 +32,6 @@ ifneq ($(strip $(BUILD_JOBS)),)
 	NEXTEST_OPTS += --build-jobs=${BUILD_JOBS}
 endif
 ifneq ($(strip $(BUILD_JOBS)),)
 	SQLNESS_OPTS += --jobs ${BUILD_JOBS}
 endif
 ifneq ($(strip $(CARGO_PROFILE)),)
 	CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE}
 endif
@@ -197,7 +193,6 @@ fix-clippy: ## Fix clippy violations.
 fmt-check: ## Check code format.
 	cargo fmt --all -- --check
 	python3 scripts/check-snafu.py
 	python3 scripts/check-super-imports.py
 .PHONY: start-etcd
 start-etcd: ## Start single node etcd for testing purpose.
@@ -222,16 +217,6 @@ start-cluster: ## Start the greptimedb cluster with etcd by using docker compose
 stop-cluster: ## Stop the greptimedb cluster that created by docker compose.
 	docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml stop
 ##@ Grafana
 .PHONY: check-dashboards
 check-dashboards: ## Check the Grafana dashboards.
 	@./grafana/scripts/check.sh
 .PHONY: dashboards
 dashboards: ## Generate the Grafana dashboards for standalone mode and intermediate dashboards.
 	@./grafana/scripts/gen-dashboards.sh
 ##@ Docs
 config-docs: ## Generate configuration documentation from toml files.
 	docker run --rm \
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
  </picture>
 </p>
-<h2 align="center">Real-Time & Cloud-Native Observability  Database<br/>for metrics, logs, and traces</h2>
+<h2 align="center">Unified & Cost-Effective Observerability Database for Metrics, Logs, and Events</h2>
 <div align="center">
 <h3 align="center">
@@ -62,7 +62,7 @@
 ## Introduction
-**GreptimeDB** is an open-source, cloud-native, unified & cost-effective observability database for **Metrics**, **Logs**, and **Traces**. You can gain real-time insights from Edge to Cloud at Any Scale.
+**GreptimeDB** is an open-source unified & cost-effective observerability database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
 ## News
@@ -70,27 +70,27 @@
 ## Why GreptimeDB
-Our core developers have been building observability data platforms for years. Based on our best practices, GreptimeDB was born to give you:
+Our core developers have been building observerability data platforms for years. Based on our best practices, GreptimeDB was born to give you:
-* **Unified Processing of Observability Data**
+* **Unified Processing of Metrics, Logs, and Events**
-  A unified database that treats metrics, logs, and traces as timestamped wide events with context, supporting [SQL](https://docs.greptime.com/user-guide/query-data/sql)/[PromQL](https://docs.greptime.com/user-guide/query-data/promql) queries and [stream processing](https://docs.greptime.com/user-guide/flow-computation/overview) to simplify complex data stacks.
+  GreptimeDB unifies observerability data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
 * **High Performance and Cost-effective**
   Written in Rust, combines a distributed query engine with [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index) (inverted, fulltext, skip data, and vector) and optimized columnar storage to deliver sub-second responses on petabyte-scale data and high-cost efficiency.
 * **Cloud-native Distributed Database**
  Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.
-* **Developer-Friendly**
+* **Performance and Cost-effective**
-  Access standardized SQL/PromQL interfaces through built-in web dashboard, REST API, and MySQL/PostgreSQL protocols. Supports widely adopted data ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview) for seamless migration and integration.
+  Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
-* **Flexible Deployment Options**
+* **Cloud-Edge Collaboration**
-  Deploy GreptimeDB anywhere from ARM-based edge devices to cloud environments with unified APIs and bandwidth-efficient data synchronization. Query edge and cloud data seamlessly through identical APIs. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
+  GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
 * **Multi-protocol Ingestion, SQL & PromQL Ready**
  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc.  Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
 For more detailed info please read  [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb).
@@ -233,5 +233,3 @@ Special thanks to all the contributors who have propelled GreptimeDB forward. Fo
 - GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
 - [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
 - GreptimeDB's meta service is based on [etcd](https://etcd.io/).
 <img alt="Known Users" src="https://greptime.com/logo/img/users.png"/>
--- a/config/config.md
+++ b/config/config.md
@@ -96,8 +96,6 @@
 | `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
 | `flow` | -- | -- | flow engine options. |
 | `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
 | `query` | -- | -- | The query engine options. |
 | `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `storage` | -- | -- | The data storage options. |
 | `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -272,8 +270,6 @@
 | `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
 | `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
 | `meta_client.metadata_cache_tti` | String | `5m` | -- |
 | `query` | -- | -- | The query engine options. |
 | `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `datanode` | -- | -- | Datanode options. |
 | `datanode.client` | -- | -- | Datanode client options. |
 | `datanode.client.connect_timeout` | String | `10s` | -- |
@@ -319,7 +315,6 @@
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
 | `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
 | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
@@ -344,9 +339,6 @@
 | `wal.provider` | String | `raft_engine` | -- |
 | `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
 | `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
 | `wal.auto_prune_interval` | String | `0s` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically. |
 | `wal.trigger_flush_threshold` | Integer | `0` | The threshold to trigger a flush operation of a region in automatically WAL pruning.<br/>Metasrv will send a flush request to flush the region when:<br/>`trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`<br/>where:<br/>- `prunable_entry_id` is the maximum entry id that can be pruned of the region.<br/>- `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.<br/>Set to `0` to disable the flush operation. |
 | `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning. |
 | `wal.num_topics` | Integer | `64` | Number of topics. |
 | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
 | `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
@@ -437,8 +429,6 @@
 | `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
 | `query` | -- | -- | The query engine options. |
 | `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `storage` | -- | -- | The data storage options. |
 | `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -243,12 +243,6 @@ overwrite_entry_start_id = false
 # credential = "base64-credential"
 # endpoint = "https://storage.googleapis.com"
 ## The query engine options.
 [query]
 ## Parallelism of the query engine.
 ## Default to 0, which means the number of CPU cores.
 parallelism = 0
 ## The data storage options.
 [storage]
 ## The working home directory.
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -179,12 +179,6 @@ metadata_cache_ttl = "10m"
 # TTI of the metadata cache.
 metadata_cache_tti = "5m"
 ## The query engine options.
 [query]
 ## Parallelism of the query engine.
 ## Default to 0, which means the number of CPU cores.
 parallelism = 0
 ## Datanode options.
 [datanode]
 ## Datanode client options.
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -50,10 +50,6 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false
 ## Whether to allow region failover on local WAL.
 ## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
 allow_region_failover_on_local_wal = false
 ## Max allowed idle time before removing node info from metasrv memory.
 node_max_idle_time = "24hours"
@@ -134,22 +130,6 @@ broker_endpoints = ["127.0.0.1:9092"]
 ## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
 auto_create_topics = true
 ## Interval of automatically WAL pruning.
 ## Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.
 auto_prune_interval = "0s"
 ## The threshold to trigger a flush operation of a region in automatically WAL pruning.
 ## Metasrv will send a flush request to flush the region when:
 ## `trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`
 ## where:
 ## - `prunable_entry_id` is the maximum entry id that can be pruned of the region.
 ## - `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.
 ## Set to `0` to disable the flush operation.
 trigger_flush_threshold = 0
 ## Concurrent task limit for automatically WAL pruning.
 auto_prune_parallelism = 10
 ## Number of topics.
 num_topics = 64
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -334,12 +334,6 @@ max_running_procedures = 128
 # credential = "base64-credential"
 # endpoint = "https://storage.googleapis.com"
 ## The query engine options.
 [query]
 ## Parallelism of the query engine.
 ## Default to 0, which means the number of CPU cores.
 parallelism = 0
 ## The data storage options.
 [storage]
 ## The working home directory.
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -1,6 +1,6 @@
 # Profile memory usage of GreptimeDB
-This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](docs/how-to/memory-profile-scripts).
+This crate provides an easy approach to dump memory profiling info.
 ## Prerequisites
 ### jemalloc
--- a/docs/how-to/memory-profile-scripts/scripts/README.md
+++ b/docs/how-to/memory-profile-scripts/scripts/README.md
@@ -1,52 +0,0 @@
 # Memory Analysis Process
 This section will guide you through the process of analyzing memory usage for greptimedb.
 1. Get the `jeprof` tool script, see the next section("Getting the `jeprof` tool") for details.
 2. After starting `greptimedb`(with env var `MALLOC_CONF=prof:true`), execute the `dump.sh` script with the PID of the `greptimedb` process as an argument. This continuously monitors memory usage and captures profiles when exceeding thresholds (e.g. +20MB within 10 minutes). Outputs `greptime-{timestamp}.gprof` files.
 3. With 2-3 gprof files, run `gen_flamegraph.sh` in the same environment to generate flame graphs showing memory allocation call stacks.
 4.  **NOTE:** The `gen_flamegraph.sh` script requires `jeprof` and optionally `flamegraph.pl` to be in the current directory. If needed to gen flamegraph now, run the `get_flamegraph_tool.sh` script, which downloads the flame graph generation tool `flamegraph.pl` to the current directory.
    The usage of `gen_flamegraph.sh` is:
    `Usage: ./gen_flamegraph.sh <binary_path> <gprof_directory>`
    where `<binary_path>` is the path to the greptimedb binary, `<gprof_directory>` is the directory containing the gprof files(the directory `dump.sh` is dumping profiles to).
    Example call: `./gen_flamegraph.sh ./greptime .`
    Generating the flame graph might take a few minutes. The generated flame graphs are located in the `<gprof_directory>/flamegraphs` directory. Or if no `flamegraph.pl` is found, it will only contain `.collapse` files which is also fine.
 5.  You can send the generated flame graphs(the entire folder of `<gprof_directory>/flamegraphs`) to developers for further analysis.
 ## Getting the `jeprof` tool
 there are three ways to get `jeprof`, list in here from simple to complex, using any one of those methods is ok, as long as it's the same environment as the `greptimedb` will be running on:
 1. If you are compiling greptimedb from source, then `jeprof` is already produced during compilation. After running `cargo build`, execute `find_compiled_jeprof.sh`. This will copy `jeprof` to the current directory.
 2. Or, if you have the Rust toolchain installed locally, simply follow these commands:
 ```bash
 cargo new get_jeprof
 cd get_jeprof
 ```
 Then add this line to `Cargo.toml`:
 ```toml
 [dependencies]
 tikv-jemalloc-ctl = { version = "0.6", features = ["use_std", "stats"] }
 ```
 then run:
 ```bash
 cargo build
 ```
 after that the `jeprof` tool is produced. Now run `find_compiled_jeprof.sh` in current directory, it will copy the `jeprof` tool to the current directory.
 3. compile jemalloc from source
 you can first clone this repo, and checkout to this commit:
 ```bash
 git clone https://github.com/tikv/jemalloc.git
 cd jemalloc
 git checkout e13ca993e8ccb9ba9847cc330696e02839f328f7
 ```
 then run:
 ```bash
 ./configure
 make
 ```
 and `jeprof` is in `.bin/` directory. Copy it to the current directory.
--- a/docs/how-to/memory-profile-scripts/scripts/dump.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/dump.sh
@@ -1,78 +0,0 @@
 #!/bin/bash
 # Monitors greptime process memory usage every 10 minutes
 # Triggers memory profile capture via `curl -X POST localhost:4000/debug/prof/mem > greptime-{timestamp}.gprof`
 # when memory increases by more than 20MB since last check
 # Generated profiles can be analyzed using flame graphs as described in `how-to-profile-memory.md`
 # (jeprof is compiled with the database - see documentation)
 # Alternative: Share binaries + profiles for analysis (Docker images preferred)
 # Threshold in Kilobytes (20 MB)
 threshold_kb=$((20 * 1024))
 sleep_interval=$((10 * 60))
 # Variable to store the last measured memory usage in KB
 last_mem_kb=0
 echo "Starting memory monitoring for 'greptime' process..."
 while true; do
    # Check if PID is provided as an argument
    if [ -z "$1" ]; then
        echo "$(date): PID must be provided as a command-line argument."
        exit 1
    fi
    pid="$1"
    # Validate that the PID is a number
    if ! [[ "$pid" =~ ^[0-9]+$ ]]; then
        echo "$(date): Invalid PID: '$pid'. PID must be a number."
        exit 1
    fi
    # Get the current Resident Set Size (RSS) in Kilobytes
    current_mem_kb=$(ps -o rss= -p "$pid")
    # Check if ps command was successful and returned a number
    if ! [[ "$current_mem_kb" =~ ^[0-9]+$ ]]; then
        echo "$(date): Failed to get memory usage for PID $pid. Skipping check."
        # Keep last_mem_kb to avoid false positives if the process briefly becomes unreadable.
        continue
    fi
    echo "$(date): Current memory usage for PID $pid: ${current_mem_kb} KB"
    # Compare with the last measurement
    # if it's the first run, also do a baseline dump just to make sure we can dump
    diff_kb=$((current_mem_kb - last_mem_kb))
    echo "$(date): Memory usage change since last check: ${diff_kb} KB"
    if [ "$diff_kb" -gt "$threshold_kb" ]; then
        echo "$(date): Memory increase (${diff_kb} KB) exceeded threshold (${threshold_kb} KB). Dumping profile..."
        timestamp=$(date +%Y%m%d%H%M%S)
        profile_file="greptime-${timestamp}.gprof"
        # Execute curl and capture output to file
        if curl -sf -X POST localhost:4000/debug/prof/mem > "$profile_file"; then
            echo "$(date): Memory profile saved to $profile_file"
        else
            echo "$(date): Failed to dump memory profile (curl exit code: $?)."
            # Remove the potentially empty/failed profile file
            rm -f "$profile_file"
        fi
    else
            echo "$(date): Memory increase (${diff_kb} KB) is within the threshold (${threshold_kb} KB)."
    fi
    # Update the last memory usage
    last_mem_kb=$current_mem_kb
    # Wait for 5 minutes
    echo "$(date): Sleeping for $sleep_interval seconds..."
    sleep $sleep_interval
 done
 echo "Memory monitoring script stopped." # This line might not be reached in normal operation
--- a/docs/how-to/memory-profile-scripts/scripts/find_compiled_jeprof.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/find_compiled_jeprof.sh
@@ -1,15 +0,0 @@
 #!/bin/bash
 # Locates compiled jeprof binary (memory analysis tool) after cargo build
 # Copies it to current directory from target/ build directories
 JPROF_PATH=$(find . -name 'jeprof' -print -quit)
 if [ -n "$JPROF_PATH" ]; then
  echo "Found jeprof at $JPROF_PATH"
  cp "$JPROF_PATH" .
  chmod +x jeprof
  echo "Copied jeprof to current directory and made it executable."
 else
  echo "jeprof not found"
  exit 1
 fi
--- a/docs/how-to/memory-profile-scripts/scripts/gen_flamegraph.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/gen_flamegraph.sh
@@ -1,89 +0,0 @@
 #!/bin/bash
 # Generate flame graphs from a series of `.gprof` files
 # First argument: Path to the binary executable
 # Second argument: Path to directory containing gprof files
 # Requires `jeprof` and `flamegraph.pl` in current directory
 # What this script essentially does is:
 # ./jeprof <binary> <gprof> --collapse | ./flamegraph.pl > <output>
 # For differential analysis between consecutive profiles:
 # ./jeprof <binary> --base <gprof1> <gprof2> --collapse | ./flamegraph.pl > <output_diff>
 set -e # Exit immediately if a command exits with a non-zero status.
 # Check for required tools
 if [ ! -f "./jeprof" ]; then
    echo "Error: jeprof not found in the current directory."
    exit 1
 fi
 if [ ! -f "./flamegraph.pl" ]; then
    echo "Error: flamegraph.pl not found in the current directory."
    exit 1
 fi
 # Check arguments
 if [ "$#" -ne 2 ]; then
    echo "Usage: $0 <binary_path> <gprof_directory>"
    exit 1
 fi
 BINARY_PATH=$1
 GPROF_DIR=$2
 OUTPUT_DIR="${GPROF_DIR}/flamegraphs" # Store outputs in a subdirectory
 if [ ! -f "$BINARY_PATH" ]; then
    echo "Error: Binary file not found at $BINARY_PATH"
    exit 1
 fi
 if [ ! -d "$GPROF_DIR" ]; then
    echo "Error: gprof directory not found at $GPROF_DIR"
    exit 1
 fi
 mkdir -p "$OUTPUT_DIR"
 echo "Generating flamegraphs in $OUTPUT_DIR"
 # Find and sort gprof files
 # Use find + sort -V for natural sort of version numbers if present in filenames
 # Use null-terminated strings for safety with find/xargs/sort
 mapfile -d $'\0' gprof_files < <(find "$GPROF_DIR" -maxdepth 1 -name '*.gprof' -print0 | sort -zV)
 if [ ${#gprof_files[@]} -eq 0 ]; then
    echo "No .gprof files found in $GPROF_DIR"
    exit 0
 fi
 prev_gprof=""
 # Generate flamegraphs
 for gprof_file in "${gprof_files[@]}"; do
    # Skip empty entries if any
    if [ -z "$gprof_file" ]; then
        continue
    fi
    filename=$(basename "$gprof_file" .gprof)
    output_collapse="${OUTPUT_DIR}/${filename}.collapse"
    output_svg="${OUTPUT_DIR}/${filename}.svg"
    echo "Generating collapse file for $gprof_file -> $output_collapse"
    ./jeprof "$BINARY_PATH" "$gprof_file" --collapse > "$output_collapse"
    echo "Generating flamegraph for $gprof_file -> $output_svg"
    ./flamegraph.pl "$output_collapse" > "$output_svg" || true
    # Generate diff flamegraph if not the first file
    if [ -n "$prev_gprof" ]; then
        prev_filename=$(basename "$prev_gprof" .gprof)
        diff_output_collapse="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.collapse"
        diff_output_svg="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.svg"
        echo "Generating diff collapse file for $prev_gprof vs $gprof_file -> $diff_output_collapse"
        ./jeprof "$BINARY_PATH" --base "$prev_gprof" "$gprof_file" --collapse > "$diff_output_collapse"
        echo "Generating diff flamegraph for $prev_gprof vs $gprof_file -> $diff_output_svg"
        ./flamegraph.pl "$diff_output_collapse" > "$diff_output_svg" || true
    fi
    prev_gprof="$gprof_file"
 done
 echo "Flamegraph generation complete."
--- a/docs/how-to/memory-profile-scripts/scripts/gen_from_collapse.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/gen_from_collapse.sh
@@ -1,44 +0,0 @@
 #!/bin/bash
 # Generate flame graphs from .collapse files
 # Argument: Path to directory containing collapse files
 # Requires `flamegraph.pl` in current directory
 # Check if flamegraph.pl exists
 if [ ! -f "./flamegraph.pl" ]; then
    echo "Error: flamegraph.pl not found in the current directory."
    exit 1
 fi
 # Check if directory argument is provided
 if [ -z "$1" ]; then
    echo "Usage: $0 <collapse_directory>"
    exit 1
 fi
 COLLAPSE_DIR=$1
 # Check if the provided argument is a directory
 if [ ! -d "$COLLAPSE_DIR" ]; then
    echo "Error: '$COLLAPSE_DIR' is not a valid directory."
    exit 1
 fi
 echo "Generating flame graphs from collapse files in '$COLLAPSE_DIR'..."
 # Find and process each .collapse file
 find "$COLLAPSE_DIR" -maxdepth 1 -name "*.collapse" -print0 | while IFS= read -r -d $'\0' collapse_file; do
    if [ -f "$collapse_file" ]; then
        # Construct the output SVG filename
        svg_file="${collapse_file%.collapse}.svg"
        echo "Generating $svg_file from $collapse_file..."
        ./flamegraph.pl "$collapse_file" > "$svg_file"
        if [ $? -ne 0 ]; then
            echo "Error generating flame graph for $collapse_file"
        else
            echo "Successfully generated $svg_file"
        fi
    fi
 done
 echo "Flame graph generation complete."
--- a/docs/how-to/memory-profile-scripts/scripts/get_flamegraph_tool.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/get_flamegraph_tool.sh
@@ -1,6 +0,0 @@
 #!/bin/bash
 # Download flamegraph.pl to current directory - this is the flame graph generation tool script
 curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
 chmod +x ./flamegraph.pl
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -1,89 +1,61 @@
-# Grafana dashboards for GreptimeDB
+Grafana dashboard for GreptimeDB
 --------------------------------
-## Overview
+GreptimeDB's official Grafana dashboard.
-This repository maintains the Grafana dashboards for GreptimeDB. It has two types of dashboards:
+Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
- `cluster/dashboard.json`: The Grafana dashboard for the GreptimeDB cluster. Read the [dashboard.md](./dashboards/cluster/dashboard.md) for more details.
+If you use Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
 - `standalone/dashboard.json`: The Grafana dashboard for the standalone GreptimeDB instance. **It's generated from the `cluster/dashboard.json` by removing the instance filter through the `make dashboards` command**. Read the [dashboard.md](./dashboards/standalone/dashboard.md) for more details.
 As the rapid development of GreptimeDB, the metrics may be changed, and please feel free to submit your feedback and/or contribution to this dashboard 🤗
 **NOTE**: 
 - The Grafana version should be greater than 9.0.
 - If you want to modify the dashboards, you only need to modify the `cluster/dashboard.json` and run the `make dashboards` command to generate the `standalone/dashboard.json` and other related files.
 To maintain the dashboards easily, we use the [`dac`](https://github.com/zyy17/dac) tool to generate the intermediate dashboards and markdown documents:
 - `cluster/dashboard.yaml`: The intermediate dashboard for the GreptimeDB cluster.
 - `standalone/dashboard.yaml`: The intermediate dashboard for the standalone GreptimeDB instance.
 ## Data Sources
 There are two data sources for the dashboards to fetch the metrics:
 - **Prometheus**: Expose the metrics of GreptimeDB.
 - **Information Schema**: It is the MySQL port of the current monitored instance. The `overview` dashboard will use this datasource to show the information schema of the current instance.
 ## Instance Filters
 To deploy the dashboards for multiple scenarios (K8s, bare metal, etc.), we prefer to use the `instance` label when filtering instances.
 Additionally, we recommend including the `pod` label in the legend to make it easier to identify each instance, even though this field will be empty in bare metal scenarios.
 For example, the following query is recommended:
 ```promql
 sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
 ```
 And the legend will be like: `[{{instance}}]-[{{ pod }}]`.
 ## Deployment
 ### Helm
 If you use the Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy a GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
 - `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
 - `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;
-The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
+The standalone GreptimeDB instance will collect metrics from your cluster and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
-### Self-host Prometheus and import dashboards manually
+# How to use
-1. **Configure Prometheus to scrape the cluster**
+## `greptimedb.json`
-   The following is an example configuration(**Please modify it according to your actual situation**):
+Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.
-    ```yml
+## `greptimedb-cluster.json`
    # example config
    # only to indicate how to assign labels to each target
    # modify yours accordingly
    scrape_configs:
      - job_name: metasrv
        static_configs:
        - targets: ['<metasrv-ip>:<port>']
-      - job_name: datanode
+This cluster dashboard provides a comprehensive view of incoming requests, response statuses, and internal activities such as flush and compaction, with a layered structure from frontend to datanode. Designed with a focus on alert functionality, its primary aim is to highlight any anomalies in metrics, allowing users to quickly pinpoint the cause of errors.
        static_configs:
        - targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
-      - job_name: frontend
+We use Prometheus to scrape off metrics from nodes in GreptimeDB cluster, Grafana to visualize the diagram. Any compatible stack should work too.
        static_configs:
        - targets: ['<frontend-ip>:<port>']
    ```
-2. **Configure the data sources in Grafana**
+__Note__: This dashboard is still in an early stage of development. Any issue or advice on improvement is welcomed.
-   You need to add two data sources in Grafana:
+### Configuration
-   - Prometheus: It is the Prometheus instance that scrapes the GreptimeDB metrics.
+Please ensure the following configuration before importing the dashboard into Grafana.
   - Information Schema: It is the MySQL port of the current monitored instance. The dashboard will use this datasource to show the information schema of the current instance.
-3. **Import the dashboards based on your deployment scenario**
+__1. Prometheus scrape config__
-   - **Cluster**: Import the `cluster/dashboard.json` dashboard.
+Configure Prometheus to scrape the cluster.
-   - **Standalone**: Import the `standalone/dashboard.json` dashboard.
+
 ```yml
 # example config
 # only to indicate how to assign labels to each target
 # modify yours accordingly
 scrape_configs:
  - job_name: metasrv
    static_configs:
    - targets: ['<metasrv-ip>:<port>']
  - job_name: datanode
    static_configs:
    - targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
  - job_name: frontend
    static_configs:
    - targets: ['<frontend-ip>:<port>']
 ```
 __2. Grafana config__
 Create a Prometheus data source in Grafana before using this dashboard. We use `datasource` as a variable in Grafana dashboard so that multiple environments are supported.
 ### Usage
 Use `datasource` or `instance` on the upper-left corner to filter data from certain node.
--- a/grafana/check.sh
+++ b/grafana/check.sh
@@ -0,0 +1,19 @@
 #!/usr/bin/env bash
 BASEDIR=$(dirname "$0")
 # Use jq to check for panels with empty or missing descriptions
 invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
  .panels[]
  | select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
 ')
 # Check if any invalid panels were found
 if [[ -n "$invalid_panels" ]]; then
  echo "Error: The following panels have empty or missing descriptions:"
  echo "$invalid_panels"
  exit 1
 else
  echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
  exit 0
 fi
--- a/grafana/dashboards/cluster/dashboard.json
+++ b/grafana/dashboards/cluster/dashboard.json
--- a/grafana/dashboards/cluster/dashboard.md
+++ b/grafana/dashboards/cluster/dashboard.md
@@ -1,97 +0,0 @@
 # Overview
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
 | Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
 | Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
 | Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
 | Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
 | Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
 | Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
 | Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
 # Ingestion
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
 | Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
 # Queries
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
 # Resources
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
 | Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 | Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
 | Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
 | Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
 | Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 | Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
 | Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 # Frontend Requests
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
 | HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
 | gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
 | gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
 | MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
 | MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
 | PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
 | PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
 # Frontend to Datanode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
 | Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
 | Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
 # Mito Engine
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 | Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 | Write Buffer per Instance | `greptime_mito_write_buffer_bytes{instance=~"$datanode"}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
 | Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
 | Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
 | Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
 | Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
 | Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
 | Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
 | Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
 | WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
 | Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 | Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 | WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
 | Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
 | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
 | Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
 | Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
 | Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
 | Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
 | Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
 | Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
 | Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
 | Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
--- a/grafana/dashboards/cluster/dashboard.yaml
+++ b/grafana/dashboards/cluster/dashboard.yaml
@@ -1,769 +0,0 @@
 groups:
    - title: Overview
      panels:
        - title: Uptime
          type: stat
          description: The start time of GreptimeDB.
          unit: s
          queries:
            - expr: time() - process_start_time_seconds
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
        - title: Version
          type: stat
          description: GreptimeDB version.
          queries:
            - expr: SELECT pkg_version FROM information_schema.build_info
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Total Ingestion Rate
          type: stat
          description: Total ingestion rate.
          unit: rowsps
          queries:
            - expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
        - title: Total Storage Size
          type: stat
          description: Total number of data file size.
          unit: decbytes
          queries:
            - expr: select SUM(disk_size) from information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Total Rows
          type: stat
          description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
          unit: sishort
          queries:
            - expr: select SUM(region_rows) from information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Deployment
          type: stat
          description: The deployment topology of GreptimeDB.
          queries:
            - expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Database Resources
          type: stat
          description: The number of the key resources in GreptimeDB.
          queries:
            - expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT COUNT(*) as flows FROM information_schema.flows
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Data Size
          type: stat
          description: The data size of wal/index/manifest in the GreptimeDB.
          unit: decbytes
          queries:
            - expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
    - title: Ingestion
      panels:
        - title: Total Ingestion Rate
          type: timeseries
          description: |
            Total ingestion rate.
            Here we listed 3 primary protocols:
            - Prometheus remote write
            - Greptime's gRPC API (when using our ingest SDK)
            - Log ingestion http API
          unit: rowsps
          queries:
            - expr: sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: ingestion
        - title: Ingestion Rate by Type
          type: timeseries
          description: |
            Total ingestion rate.
            Here we listed 3 primary protocols:
            - Prometheus remote write
            - Greptime's gRPC API (when using our ingest SDK)
            - Log ingestion http API
          unit: rowsps
          queries:
            - expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: http-logs
            - expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: prometheus-remote-write
    - title: Queries
      panels:
        - title: Total Query Rate
          type: timeseries
          description: |-
            Total rate of query API calls by protocol. This metric is collected from frontends.
            Here we listed 3 main protocols:
            - MySQL
            - Postgres
            - Prometheus API
            Note that there are some other minor query APIs like /sql are not included
          unit: reqps
          queries:
            - expr: sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: mysql
            - expr: sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: pg
            - expr: sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: promql
    - title: Resources
      panels:
        - title: Datanode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{ pod }}]'
        - title: Datanode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Frontend Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Frontend CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
        - title: Metasrv Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
        - title: Metasrv CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Flownode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Flownode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
    - title: Frontend Requests
      panels:
        - title: HTTP QPS per Instance
          type: timeseries
          description: HTTP QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
        - title: HTTP P99 per Instance
          type: timeseries
          description: HTTP P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
        - title: gRPC QPS per Instance
          type: timeseries
          description: gRPC QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
        - title: gRPC P99 per Instance
          type: timeseries
          description: gRPC P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
        - title: MySQL QPS per Instance
          type: timeseries
          description: MySQL QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: MySQL P99 per Instance
          type: timeseries
          description: MySQL P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
        - title: PostgreSQL QPS per Instance
          type: timeseries
          description: PostgreSQL QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: PostgreSQL P99 per Instance
          type: timeseries
          description: PostgreSQL P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
    - title: Frontend to Datanode
      panels:
        - title: Ingest Rows per Instance
          type: timeseries
          description: Ingestion rate by row as in each frontend
          unit: rowsps
          queries:
            - expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Region Call QPS per Instance
          type: timeseries
          description: Region Call QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
        - title: Region Call P99 per Instance
          type: timeseries
          description: Region Call P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
    - title: Mito Engine
      panels:
        - title: Request OPS per Instance
          type: timeseries
          description: Request QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
        - title: Request P99 per Instance
          type: timeseries
          description: Request P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
        - title: Write Buffer per Instance
          type: timeseries
          description: Write Buffer per Instance.
          unit: decbytes
          queries:
            - expr: greptime_mito_write_buffer_bytes{instance=~"$datanode"}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Write Rows per Instance
          type: timeseries
          description: Ingestion size by row counts.
          unit: rowsps
          queries:
            - expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Flush OPS per Instance
          type: timeseries
          description: Flush QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
        - title: Write Stall per Instance
          type: timeseries
          description: Write Stall per Instance.
          queries:
            - expr: sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Read Stage OPS per Instance
          type: timeseries
          description: Read Stage OPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Read Stage P99 per Instance
          type: timeseries
          description: Read Stage P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
        - title: Write Stage P99 per Instance
          type: timeseries
          description: Write Stage P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
        - title: Compaction OPS per Instance
          type: timeseries
          description: Compaction OPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{pod}}]'
        - title: Compaction P99 per Instance by Stage
          type: timeseries
          description: Compaction latency by stage
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
        - title: Compaction P99 per Instance
          type: timeseries
          description: Compaction P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
        - title: WAL write size
          type: timeseries
          description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
          unit: bytes
          queries:
            - expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
            - expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
            - expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
        - title: Cached Bytes per Instance
          type: timeseries
          description: Cached Bytes per Instance.
          unit: decbytes
          queries:
            - expr: greptime_mito_cache_bytes{instance=~"$datanode"}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
        - title: Inflight Compaction
          type: timeseries
          description: Ongoing compaction task count
          unit: none
          queries:
            - expr: greptime_mito_inflight_compaction_count
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: WAL sync duration seconds
          type: timeseries
          description: Raft engine (local disk) log store sync latency, p99
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
        - title: Log Store op duration seconds
          type: timeseries
          description: Write-ahead log operations latency at p99
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
        - title: Inflight Flush
          type: timeseries
          description: Ongoing flush task count
          unit: none
          queries:
            - expr: greptime_mito_inflight_flush_count
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
    - title: OpenDAL
      panels:
        - title: QPS per Instance
          type: timeseries
          description: QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Read QPS per Instance
          type: timeseries
          description: Read QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: Read P99 per Instance
          type: timeseries
          description: Read P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
        - title: Write QPS per Instance
          type: timeseries
          description: Write QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
        - title: Write P99 per Instance
          type: timeseries
          description: Write P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: List QPS per Instance
          type: timeseries
          description: List QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: List P99 per Instance
          type: timeseries
          description: List P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: Other Requests per Instance
          type: timeseries
          description: Other Requests per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read|write|list|stat"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Other Request P99 per Instance
          type: timeseries
          description: Other Request P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Opendal traffic
          type: timeseries
          description: Total traffic as in bytes by instance and operation
          unit: decbytes
          queries:
            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: OpenDAL errors per Instance
          type: timeseries
          description: OpenDAL error counts per Instance.
          queries:
            - expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
    - title: Metasrv
      panels:
        - title: Region migration datanode
          type: state-timeline
          description: Counter of region migration by source and destination
          unit: none
          queries:
            - expr: greptime_meta_region_migration_stat{datanode_type="src"}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: from-datanode-{{datanode_id}}
            - expr: greptime_meta_region_migration_stat{datanode_type="desc"}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: to-datanode-{{datanode_id}}
        - title: Region migration error
          type: timeseries
          description: Counter of region migration error
          unit: none
          queries:
            - expr: greptime_meta_region_migration_error
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
        - title: Datanode load
          type: timeseries
          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
          unit: none
          queries:
            - expr: greptime_datanode_load
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
          type: timeseries
          description: Flow Ingest / Output Rate.
          queries:
            - expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
        - title: Flow Ingest Latency
          type: timeseries
          description: Flow Ingest Latency.
          queries:
            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p95'
            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
        - title: Flow Operation Latency
          type: timeseries
          description: Flow Operation Latency.
          queries:
            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
        - title: Flow Buffer Size per Instance
          type: timeseries
          description: Flow Buffer Size per Instance.
          queries:
            - expr: greptime_flow_input_buf_size
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}]'
        - title: Flow Processing Error per Instance
          type: timeseries
          description: Flow Processing Error per Instance.
          queries:
            - expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
--- a/grafana/dashboards/standalone/dashboard.json
+++ b/grafana/dashboards/standalone/dashboard.json
--- a/grafana/dashboards/standalone/dashboard.md
+++ b/grafana/dashboards/standalone/dashboard.md
@@ -1,97 +0,0 @@
 # Overview
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
 | Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
 | Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
 | Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
 | Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
 | Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
 | Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
 | Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
 # Ingestion
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
 | Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
 # Queries
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
 # Resources
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
 | Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 | Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
 | Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
 | Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
 | Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 | Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
 | Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
 # Frontend Requests
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
 | HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
 | gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
 | gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
 | MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
 | MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
 | PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
 | PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
 # Frontend to Datanode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
 | Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
 | Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
 # Mito Engine
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 | Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 | Write Buffer per Instance | `greptime_mito_write_buffer_bytes{}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
 | Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
 | Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
 | Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
 | Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
 | Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
 | Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
 | Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
 | WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
 | Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
 | Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 | WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
 | Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
 | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
 | Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
 | Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
 | Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
 | Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
 | Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
 | Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
 | Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
 | Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
--- a/grafana/dashboards/standalone/dashboard.yaml
+++ b/grafana/dashboards/standalone/dashboard.yaml
@@ -1,769 +0,0 @@
 groups:
    - title: Overview
      panels:
        - title: Uptime
          type: stat
          description: The start time of GreptimeDB.
          unit: s
          queries:
            - expr: time() - process_start_time_seconds
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
        - title: Version
          type: stat
          description: GreptimeDB version.
          queries:
            - expr: SELECT pkg_version FROM information_schema.build_info
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Total Ingestion Rate
          type: stat
          description: Total ingestion rate.
          unit: rowsps
          queries:
            - expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
        - title: Total Storage Size
          type: stat
          description: Total number of data file size.
          unit: decbytes
          queries:
            - expr: select SUM(disk_size) from information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Total Rows
          type: stat
          description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
          unit: sishort
          queries:
            - expr: select SUM(region_rows) from information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Deployment
          type: stat
          description: The deployment topology of GreptimeDB.
          queries:
            - expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Database Resources
          type: stat
          description: The number of the key resources in GreptimeDB.
          queries:
            - expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT COUNT(*) as flows FROM information_schema.flows
              datasource:
                type: mysql
                uid: ${information_schema}
        - title: Data Size
          type: stat
          description: The data size of wal/index/manifest in the GreptimeDB.
          unit: decbytes
          queries:
            - expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
            - expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
              datasource:
                type: mysql
                uid: ${information_schema}
    - title: Ingestion
      panels:
        - title: Total Ingestion Rate
          type: timeseries
          description: |
            Total ingestion rate.
            Here we listed 3 primary protocols:
            - Prometheus remote write
            - Greptime's gRPC API (when using our ingest SDK)
            - Log ingestion http API
          unit: rowsps
          queries:
            - expr: sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: ingestion
        - title: Ingestion Rate by Type
          type: timeseries
          description: |
            Total ingestion rate.
            Here we listed 3 primary protocols:
            - Prometheus remote write
            - Greptime's gRPC API (when using our ingest SDK)
            - Log ingestion http API
          unit: rowsps
          queries:
            - expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: http-logs
            - expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: prometheus-remote-write
    - title: Queries
      panels:
        - title: Total Query Rate
          type: timeseries
          description: |-
            Total rate of query API calls by protocol. This metric is collected from frontends.
            Here we listed 3 main protocols:
            - MySQL
            - Postgres
            - Prometheus API
            Note that there are some other minor query APIs like /sql are not included
          unit: reqps
          queries:
            - expr: sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: mysql
            - expr: sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: pg
            - expr: sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: promql
    - title: Resources
      panels:
        - title: Datanode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{ pod }}]'
        - title: Datanode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Frontend Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Frontend CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
        - title: Metasrv Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
        - title: Metasrv CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Flownode Memory per Instance
          type: timeseries
          description: Current memory usage by instance
          unit: decbytes
          queries:
            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
        - title: Flownode CPU Usage per Instance
          type: timeseries
          description: Current cpu usage by instance
          unit: none
          queries:
            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]'
    - title: Frontend Requests
      panels:
        - title: HTTP QPS per Instance
          type: timeseries
          description: HTTP QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health|/metrics"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
        - title: HTTP P99 per Instance
          type: timeseries
          description: HTTP P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health|/metrics"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
        - title: gRPC QPS per Instance
          type: timeseries
          description: gRPC QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
        - title: gRPC P99 per Instance
          type: timeseries
          description: gRPC P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
        - title: MySQL QPS per Instance
          type: timeseries
          description: MySQL QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: MySQL P99 per Instance
          type: timeseries
          description: MySQL P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
        - title: PostgreSQL QPS per Instance
          type: timeseries
          description: PostgreSQL QPS per Instance.
          unit: reqps
          queries:
            - expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: PostgreSQL P99 per Instance
          type: timeseries
          description: PostgreSQL P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
    - title: Frontend to Datanode
      panels:
        - title: Ingest Rows per Instance
          type: timeseries
          description: Ingestion rate by row as in each frontend
          unit: rowsps
          queries:
            - expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Region Call QPS per Instance
          type: timeseries
          description: Region Call QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
        - title: Region Call P99 per Instance
          type: timeseries
          description: Region Call P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
    - title: Mito Engine
      panels:
        - title: Request OPS per Instance
          type: timeseries
          description: Request QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
        - title: Request P99 per Instance
          type: timeseries
          description: Request P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
        - title: Write Buffer per Instance
          type: timeseries
          description: Write Buffer per Instance.
          unit: decbytes
          queries:
            - expr: greptime_mito_write_buffer_bytes{}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Write Rows per Instance
          type: timeseries
          description: Ingestion size by row counts.
          unit: rowsps
          queries:
            - expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Flush OPS per Instance
          type: timeseries
          description: Flush QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
        - title: Write Stall per Instance
          type: timeseries
          description: Write Stall per Instance.
          queries:
            - expr: sum by(instance, pod) (greptime_mito_write_stall_total{})
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Read Stage OPS per Instance
          type: timeseries
          description: Read Stage OPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: Read Stage P99 per Instance
          type: timeseries
          description: Read Stage P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
        - title: Write Stage P99 per Instance
          type: timeseries
          description: Write Stage P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
        - title: Compaction OPS per Instance
          type: timeseries
          description: Compaction OPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{pod}}]'
        - title: Compaction P99 per Instance by Stage
          type: timeseries
          description: Compaction latency by stage
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
        - title: Compaction P99 per Instance
          type: timeseries
          description: Compaction P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
        - title: WAL write size
          type: timeseries
          description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
          unit: bytes
          queries:
            - expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
            - expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
            - expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
        - title: Cached Bytes per Instance
          type: timeseries
          description: Cached Bytes per Instance.
          unit: decbytes
          queries:
            - expr: greptime_mito_cache_bytes{}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
        - title: Inflight Compaction
          type: timeseries
          description: Ongoing compaction task count
          unit: none
          queries:
            - expr: greptime_mito_inflight_compaction_count
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
        - title: WAL sync duration seconds
          type: timeseries
          description: Raft engine (local disk) log store sync latency, p99
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
        - title: Log Store op duration seconds
          type: timeseries
          description: Write-ahead log operations latency at p99
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
        - title: Inflight Flush
          type: timeseries
          description: Ongoing flush task count
          unit: none
          queries:
            - expr: greptime_mito_inflight_flush_count
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]'
    - title: OpenDAL
      panels:
        - title: QPS per Instance
          type: timeseries
          description: QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Read QPS per Instance
          type: timeseries
          description: Read QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: Read P99 per Instance
          type: timeseries
          description: Read P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
        - title: Write QPS per Instance
          type: timeseries
          description: Write QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
        - title: Write P99 per Instance
          type: timeseries
          description: Write P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: List QPS per Instance
          type: timeseries
          description: List QPS per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: List P99 per Instance
          type: timeseries
          description: List P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
        - title: Other Requests per Instance
          type: timeseries
          description: Other Requests per Instance.
          unit: ops
          queries:
            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read|write|list|stat"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Other Request P99 per Instance
          type: timeseries
          description: Other Request P99 per Instance.
          unit: s
          queries:
            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Opendal traffic
          type: timeseries
          description: Total traffic as in bytes by instance and operation
          unit: decbytes
          queries:
            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: OpenDAL errors per Instance
          type: timeseries
          description: OpenDAL error counts per Instance.
          queries:
            - expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
    - title: Metasrv
      panels:
        - title: Region migration datanode
          type: state-timeline
          description: Counter of region migration by source and destination
          unit: none
          queries:
            - expr: greptime_meta_region_migration_stat{datanode_type="src"}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: from-datanode-{{datanode_id}}
            - expr: greptime_meta_region_migration_stat{datanode_type="desc"}
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: to-datanode-{{datanode_id}}
        - title: Region migration error
          type: timeseries
          description: Counter of region migration error
          unit: none
          queries:
            - expr: greptime_meta_region_migration_error
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
        - title: Datanode load
          type: timeseries
          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
          unit: none
          queries:
            - expr: greptime_datanode_load
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: __auto
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
          type: timeseries
          description: Flow Ingest / Output Rate.
          queries:
            - expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
        - title: Flow Ingest Latency
          type: timeseries
          description: Flow Ingest Latency.
          queries:
            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p95'
            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
        - title: Flow Operation Latency
          type: timeseries
          description: Flow Operation Latency.
          queries:
            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
        - title: Flow Buffer Size per Instance
          type: timeseries
          description: Flow Buffer Size per Instance.
          queries:
            - expr: greptime_flow_input_buf_size
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}]'
        - title: Flow Processing Error per Instance
          type: timeseries
          description: Flow Processing Error per Instance.
          queries:
            - expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
--- a/grafana/greptimedb-cluster.json
+++ b/grafana/greptimedb-cluster.json
--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
--- a/grafana/scripts/check.sh
+++ b/grafana/scripts/check.sh
@@ -1,54 +0,0 @@
 #!/usr/bin/env bash
 DASHBOARD_DIR=${1:-grafana/dashboards}
 check_dashboard_description() {
  for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
    echo "Checking $dashboard description"
    # Use jq to check for panels with empty or missing descriptions
    invalid_panels=$(cat $dashboard | jq -r '
      .panels[]
    | select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))')
    # Check if any invalid panels were found
    if [[ -n "$invalid_panels" ]]; then
      echo "Error: The following panels have empty or missing descriptions:"
      echo "$invalid_panels"
      exit 1
    else
      echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
    fi
  done
 }
 check_dashboards_generation() {
  ./grafana/scripts/gen-dashboards.sh
  if [[ -n "$(git diff --name-only grafana/dashboards)" ]]; then
    echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
    exit 1
  fi
 }
 check_datasource() {
  for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
    echo "Checking $dashboard datasource"
    jq -r '.panels[] | select(.type != "row") | .targets[] | [.datasource.type, .datasource.uid] | @tsv' $dashboard | while read -r type uid; do
    # if the datasource is prometheus, check if the uid is ${metrics}
    if [[ "$type" == "prometheus" && "$uid" != "\${metrics}" ]]; then
      echo "Error: The datasource uid of $dashboard is not valid. It should be \${metrics}, got $uid"
      exit 1
    fi
    # if the datasource is mysql, check if the uid is ${information_schema}
    if [[ "$type" == "mysql" && "$uid" != "\${information_schema}" ]]; then
      echo "Error: The datasource uid of $dashboard is not valid. It should be \${information_schema}, got $uid"
      exit 1
    fi
    done
  done
 }
 check_dashboards_generation
 check_dashboard_description
 check_datasource
--- a/grafana/scripts/gen-dashboards.sh
+++ b/grafana/scripts/gen-dashboards.sh
@@ -1,25 +0,0 @@
 #! /usr/bin/env bash
 CLUSTER_DASHBOARD_DIR=${1:-grafana/dashboards/cluster}
 STANDALONE_DASHBOARD_DIR=${2:-grafana/dashboards/standalone}
 DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
 remove_instance_filters() {
  # Remove the instance filters for the standalone dashboards.
  sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
 }
 generate_intermediate_dashboards_and_docs() {
  docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
    -i /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.json \
    -o /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.yaml \
    -m /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.md
  docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
    -i /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.json \
    -o /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.yaml \
    -m /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.md
 }
 remove_instance_filters
 generate_intermediate_dashboards_and_docs
--- a/grafana/summary.sh
+++ b/grafana/summary.sh
@@ -0,0 +1,11 @@
 #!/usr/bin/env bash
 BASEDIR=$(dirname "$0")
 echo '| Title | Description | Expressions |
 |---|---|---|'
 cat $BASEDIR/greptimedb-cluster.json | jq -r '
  .panels |
  map(select(.type == "stat" or .type == "timeseries")) |
  .[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`")  | join("<br>")) |"
 '
--- a/scripts/check-super-imports.py
+++ b/scripts/check-super-imports.py
@@ -1,74 +0,0 @@
 # Copyright 2023 Greptime Team
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import re
 from multiprocessing import Pool
 def find_rust_files(directory):
    rust_files = []
    for root, _, files in os.walk(directory):
        # Skip files with "test" in the path
        if "test" in root.lower():
            continue
        for file in files:
            # Skip files with "test" in the filename
            if "test" in file.lower():
                continue
            if file.endswith(".rs"):
                rust_files.append(os.path.join(root, file))
    return rust_files
 def check_file_for_super_import(file_path):
    with open(file_path, "r") as file:
        lines = file.readlines()
    violations = []
    for line_number, line in enumerate(lines, 1):
        # Check for "use super::" without leading tab
        if line.startswith("use super::"):
            violations.append((line_number, line.strip()))
    if violations:
        return file_path, violations
    return None
 def main():
    rust_files = find_rust_files(".")
    with Pool() as pool:
        results = pool.map(check_file_for_super_import, rust_files)
    # Filter out None results
    violations = [result for result in results if result]
    if violations:
        print("Found 'use super::' without leading tab in the following files:")
        counter = 1
        for file_path, file_violations in violations:
            for line_number, line in file_violations:
                print(f"{counter:>5} {file_path}:{line_number} - {line}")
                counter += 1
        raise SystemExit(1)
    else:
        print("No 'use super::' without leading tab found. All files are compliant.")
 if __name__ == "__main__":
    main()
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -514,7 +514,6 @@ fn query_request_type(request: &QueryRequest) -> &'static str {
        Some(Query::Sql(_)) => "query.sql",
        Some(Query::LogicalPlan(_)) => "query.logical_plan",
        Some(Query::PromRangeQuery(_)) => "query.prom_range",
        Some(Query::InsertIntoPlan(_)) => "query.insert_into_plan",
        None => "query.empty",
    }
 }
--- a/src/catalog/src/system_schema/information_schema.rs
+++ b/src/catalog/src/system_schema/information_schema.rs
@@ -49,6 +49,7 @@ pub use table_names::*;
 use views::InformationSchemaViews;
 use self::columns::InformationSchemaColumns;
 use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
 use crate::error::{Error, Result};
 use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
 use crate::system_schema::information_schema::flows::InformationSchemaFlows;
@@ -62,9 +63,7 @@ use crate::system_schema::information_schema::table_constraints::InformationSche
 use crate::system_schema::information_schema::tables::InformationSchemaTables;
 use crate::system_schema::memory_table::MemoryTable;
 pub(crate) use crate::system_schema::predicate::Predicates;
-use crate::system_schema::{
+use crate::system_schema::SystemSchemaProvider;
    SystemSchemaProvider, SystemSchemaProviderInner, SystemTable, SystemTableRef,
 };
 use crate::CatalogManager;
 lazy_static! {
--- a/src/catalog/src/system_schema/information_schema/cluster_info.rs
+++ b/src/catalog/src/system_schema/information_schema/cluster_info.rs
@@ -36,8 +36,9 @@ use datatypes::vectors::{
 use snafu::ResultExt;
 use store_api::storage::{ScanRequest, TableId};
 use super::CLUSTER_INFO;
 use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
-use crate::system_schema::information_schema::{InformationTable, Predicates, CLUSTER_INFO};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::system_schema::utils;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/information_schema/columns.rs
+++ b/src/catalog/src/system_schema/information_schema/columns.rs
@@ -38,11 +38,11 @@ use snafu::{OptionExt, ResultExt};
 use sql::statements;
 use store_api::storage::{ScanRequest, TableId};
 use super::{InformationTable, COLUMNS};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::Predicates;
 use crate::system_schema::information_schema::{InformationTable, COLUMNS};
 use crate::CatalogManager;
 #[derive(Debug)]
--- a/src/catalog/src/system_schema/information_schema/information_memory_table.rs
+++ b/src/catalog/src/system_schema/information_schema/information_memory_table.rs
@@ -18,7 +18,7 @@ use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
 use datatypes::schema::{Schema, SchemaRef};
 use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
-use crate::system_schema::information_schema::table_names::*;
+use super::table_names::*;
 use crate::system_schema::utils::tables::{
    bigint_column, string_column, string_columns, timestamp_micro_column,
 };
--- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs
+++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs
@@ -24,17 +24,18 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatch
 use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
 use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
 use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
-use datatypes::schema::{ColumnSchema, FulltextBackend, Schema, SchemaRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::value::Value;
 use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
 use futures_util::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::{ScanRequest, TableId};
 use super::KEY_COLUMN_USAGE;
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
-use crate::system_schema::information_schema::{InformationTable, Predicates, KEY_COLUMN_USAGE};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::CatalogManager;
 pub const CONSTRAINT_SCHEMA: &str = "constraint_schema";
@@ -47,38 +48,20 @@ pub const TABLE_SCHEMA: &str = "table_schema";
 pub const TABLE_NAME: &str = "table_name";
 pub const COLUMN_NAME: &str = "column_name";
 pub const ORDINAL_POSITION: &str = "ordinal_position";
 /// The type of the index.
 pub const GREPTIME_INDEX_TYPE: &str = "greptime_index_type";
 const INIT_CAPACITY: usize = 42;
 /// Time index constraint name
 pub(crate) const CONSTRAINT_NAME_TIME_INDEX: &str = "TIME INDEX";
 /// Primary key constraint name
-pub(crate) const CONSTRAINT_NAME_PRI: &str = "PRIMARY";
+pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
-/// Primary key index type
+/// Time index constraint name
-pub(crate) const INDEX_TYPE_PRI: &str = "greptime-primary-key-v1";
+pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
 /// Inverted index constraint name
-pub(crate) const CONSTRAINT_NAME_INVERTED_INDEX: &str = "INVERTED INDEX";
+pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
 /// Inverted index type
 pub(crate) const INDEX_TYPE_INVERTED_INDEX: &str = "greptime-inverted-index-v1";
 /// Fulltext index constraint name
-pub(crate) const CONSTRAINT_NAME_FULLTEXT_INDEX: &str = "FULLTEXT INDEX";
+pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
 /// Fulltext index v1 type
 pub(crate) const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "greptime-fulltext-index-v1";
 /// Fulltext index bloom type
 pub(crate) const INDEX_TYPE_FULLTEXT_BLOOM: &str = "greptime-fulltext-index-bloom";
 /// Skipping index constraint name
-pub(crate) const CONSTRAINT_NAME_SKIPPING_INDEX: &str = "SKIPPING INDEX";
+pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
 /// Skipping index type
 pub(crate) const INDEX_TYPE_SKIPPING_INDEX: &str = "greptime-bloom-filter-v1";
 /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
 ///
 /// Provides an extra column `greptime_index_type` for the index type of the key column.
 #[derive(Debug)]
 pub(super) struct InformationSchemaKeyColumnUsage {
    schema: SchemaRef,
@@ -138,11 +121,6 @@ impl InformationSchemaKeyColumnUsage {
                ConcreteDataType::string_datatype(),
                true,
            ),
            ColumnSchema::new(
                GREPTIME_INDEX_TYPE,
                ConcreteDataType::string_datatype(),
                true,
            ),
        ]))
    }
@@ -207,7 +185,6 @@ struct InformationSchemaKeyColumnUsageBuilder {
    column_name: StringVectorBuilder,
    ordinal_position: UInt32VectorBuilder,
    position_in_unique_constraint: UInt32VectorBuilder,
    greptime_index_type: StringVectorBuilder,
 }
 impl InformationSchemaKeyColumnUsageBuilder {
@@ -230,7 +207,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
            column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
            position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
            greptime_index_type: StringVectorBuilder::with_capacity(INIT_CAPACITY),
        }
    }
@@ -254,47 +230,34 @@ impl InformationSchemaKeyColumnUsageBuilder {
                for (idx, column) in schema.column_schemas().iter().enumerate() {
                    let mut constraints = vec![];
                    let mut greptime_index_type = vec![];
                    if column.is_time_index() {
                        self.add_key_column_usage(
                            &predicates,
                            &schema_name,
-                            CONSTRAINT_NAME_TIME_INDEX,
+                            TIME_INDEX_CONSTRAINT_NAME,
                            &catalog_name,
                            &schema_name,
                            table_name,
                            &column.name,
                            1, //always 1 for time index
                            "",
                        );
                    }
                    // TODO(dimbtp): foreign key constraint not supported yet
                    if keys.contains(&idx) {
-                        constraints.push(CONSTRAINT_NAME_PRI);
+                        constraints.push(PRI_CONSTRAINT_NAME);
                        greptime_index_type.push(INDEX_TYPE_PRI);
                    }
                    if column.is_inverted_indexed() {
-                        constraints.push(CONSTRAINT_NAME_INVERTED_INDEX);
+                        constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
                        greptime_index_type.push(INDEX_TYPE_INVERTED_INDEX);
                    }
-                    if let Ok(Some(options)) = column.fulltext_options() {
+                    if column.is_fulltext_indexed() {
-                        if options.enable {
+                        constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
                            constraints.push(CONSTRAINT_NAME_FULLTEXT_INDEX);
                            let index_type = match options.backend {
                                FulltextBackend::Bloom => INDEX_TYPE_FULLTEXT_BLOOM,
                                FulltextBackend::Tantivy => INDEX_TYPE_FULLTEXT_TANTIVY,
                            };
                            greptime_index_type.push(index_type);
                        }
                    }
                    if column.is_skipping_indexed() {
-                        constraints.push(CONSTRAINT_NAME_SKIPPING_INDEX);
+                        constraints.push(SKIPPING_INDEX_CONSTRAINT_NAME);
                        greptime_index_type.push(INDEX_TYPE_SKIPPING_INDEX);
                    }
                    if !constraints.is_empty() {
                        let aggregated_constraints = constraints.join(", ");
                        let aggregated_index_types = greptime_index_type.join(", ");
                        self.add_key_column_usage(
                            &predicates,
                            &schema_name,
@@ -304,7 +267,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
                            table_name,
                            &column.name,
                            idx as u32 + 1,
                            &aggregated_index_types,
                        );
                    }
                }
@@ -327,7 +289,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
        table_name: &str,
        column_name: &str,
        ordinal_position: u32,
        index_types: &str,
    ) {
        let row = [
            (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
@@ -337,7 +298,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
            (TABLE_NAME, &Value::from(table_name)),
            (COLUMN_NAME, &Value::from(column_name)),
            (ORDINAL_POSITION, &Value::from(ordinal_position)),
            (GREPTIME_INDEX_TYPE, &Value::from(index_types)),
        ];
        if !predicates.eval(&row) {
@@ -354,7 +314,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
        self.column_name.push(Some(column_name));
        self.ordinal_position.push(Some(ordinal_position));
        self.position_in_unique_constraint.push(None);
        self.greptime_index_type.push(Some(index_types));
    }
    fn finish(&mut self) -> Result<RecordBatch> {
@@ -378,7 +337,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
            null_string_vector.clone(),
            null_string_vector.clone(),
            null_string_vector,
            Arc::new(self.greptime_index_type.finish()),
        ];
        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
    }
--- a/src/catalog/src/system_schema/information_schema/partitions.rs
+++ b/src/catalog/src/system_schema/information_schema/partitions.rs
@@ -39,12 +39,13 @@ use snafu::{OptionExt, ResultExt};
 use store_api::storage::{ScanRequest, TableId};
 use table::metadata::{TableInfo, TableType};
 use super::PARTITIONS;
 use crate::error::{
    CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu,
    Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::kvbackend::KvBackendCatalogManager;
-use crate::system_schema::information_schema::{InformationTable, Predicates, PARTITIONS};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::CatalogManager;
 const TABLE_CATALOG: &str = "table_catalog";
--- a/src/catalog/src/system_schema/information_schema/procedure_info.rs
+++ b/src/catalog/src/system_schema/information_schema/procedure_info.rs
@@ -33,8 +33,9 @@ use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder}
 use snafu::ResultExt;
 use store_api::storage::{ScanRequest, TableId};
 use super::PROCEDURE_INFO;
 use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
-use crate::system_schema::information_schema::{InformationTable, Predicates, PROCEDURE_INFO};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::system_schema::utils;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/information_schema/region_peers.rs
+++ b/src/catalog/src/system_schema/information_schema/region_peers.rs
@@ -35,12 +35,13 @@ use snafu::{OptionExt, ResultExt};
 use store_api::storage::{RegionId, ScanRequest, TableId};
 use table::metadata::TableType;
 use super::REGION_PEERS;
 use crate::error::{
    CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
    UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::kvbackend::KvBackendCatalogManager;
-use crate::system_schema::information_schema::{InformationTable, Predicates, REGION_PEERS};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::CatalogManager;
 pub const TABLE_CATALOG: &str = "table_catalog";
--- a/src/catalog/src/system_schema/information_schema/region_statistics.rs
+++ b/src/catalog/src/system_schema/information_schema/region_statistics.rs
@@ -30,9 +30,9 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorB
 use snafu::ResultExt;
 use store_api::storage::{ScanRequest, TableId};
 use super::{InformationTable, REGION_STATISTICS};
 use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
 use crate::information_schema::Predicates;
 use crate::system_schema::information_schema::{InformationTable, REGION_STATISTICS};
 use crate::system_schema::utils;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/information_schema/runtime_metrics.rs
+++ b/src/catalog/src/system_schema/information_schema/runtime_metrics.rs
@@ -35,8 +35,8 @@ use itertools::Itertools;
 use snafu::ResultExt;
 use store_api::storage::{ScanRequest, TableId};
 use super::{InformationTable, RUNTIME_METRICS};
 use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
 use crate::system_schema::information_schema::{InformationTable, RUNTIME_METRICS};
 #[derive(Debug)]
 pub(super) struct InformationSchemaMetrics {
--- a/src/catalog/src/system_schema/information_schema/schemata.rs
+++ b/src/catalog/src/system_schema/information_schema/schemata.rs
@@ -31,11 +31,12 @@ use datatypes::vectors::StringVectorBuilder;
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::{ScanRequest, TableId};
 use super::SCHEMATA;
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, TableMetadataManagerSnafu,
    UpgradeWeakCatalogManagerRefSnafu,
 };
-use crate::system_schema::information_schema::{InformationTable, Predicates, SCHEMATA};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::system_schema::utils;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/information_schema/table_constraints.rs
+++ b/src/catalog/src/system_schema/information_schema/table_constraints.rs
@@ -32,14 +32,14 @@ use futures::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::{ScanRequest, TableId};
 use super::{InformationTable, TABLE_CONSTRAINTS};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::key_column_usage::{
-    CONSTRAINT_NAME_PRI, CONSTRAINT_NAME_TIME_INDEX,
+    PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
 };
 use crate::information_schema::Predicates;
 use crate::system_schema::information_schema::{InformationTable, TABLE_CONSTRAINTS};
 use crate::CatalogManager;
 /// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
@@ -188,7 +188,7 @@ impl InformationSchemaTableConstraintsBuilder {
                    self.add_table_constraint(
                        &predicates,
                        &schema_name,
-                        CONSTRAINT_NAME_TIME_INDEX,
+                        TIME_INDEX_CONSTRAINT_NAME,
                        &schema_name,
                        &table.table_info().name,
                        TIME_INDEX_CONSTRAINT_TYPE,
@@ -199,7 +199,7 @@ impl InformationSchemaTableConstraintsBuilder {
                    self.add_table_constraint(
                        &predicates,
                        &schema_name,
-                        CONSTRAINT_NAME_PRI,
+                        PRI_CONSTRAINT_NAME,
                        &schema_name,
                        &table.table_info().name,
                        PRI_KEY_CONSTRAINT_TYPE,
--- a/src/catalog/src/system_schema/information_schema/tables.rs
+++ b/src/catalog/src/system_schema/information_schema/tables.rs
@@ -38,10 +38,11 @@ use snafu::{OptionExt, ResultExt};
 use store_api::storage::{RegionId, ScanRequest, TableId};
 use table::metadata::{TableInfo, TableType};
 use super::TABLES;
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
-use crate::system_schema::information_schema::{InformationTable, Predicates, TABLES};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::system_schema::utils;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/information_schema/views.rs
+++ b/src/catalog/src/system_schema/information_schema/views.rs
@@ -32,12 +32,13 @@ use snafu::{OptionExt, ResultExt};
 use store_api::storage::{ScanRequest, TableId};
 use table::metadata::TableType;
 use super::VIEWS;
 use crate::error::{
    CastManagerSnafu, CreateRecordBatchSnafu, GetViewCacheSnafu, InternalSnafu, Result,
    UpgradeWeakCatalogManagerRefSnafu, ViewInfoNotFoundSnafu,
 };
 use crate::kvbackend::KvBackendCatalogManager;
-use crate::system_schema::information_schema::{InformationTable, Predicates, VIEWS};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::CatalogManager;
 const INIT_CAPACITY: usize = 42;
--- a/src/catalog/src/system_schema/memory_table.rs
+++ b/src/catalog/src/system_schema/memory_table.rs
@@ -29,8 +29,8 @@ use datatypes::vectors::VectorRef;
 use snafu::ResultExt;
 use store_api::storage::{ScanRequest, TableId};
 use super::SystemTable;
 use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
 use crate::system_schema::SystemTable;
 /// A memory table with specified schema and columns.
 #[derive(Debug)]
--- a/src/catalog/src/system_schema/pg_catalog.rs
+++ b/src/catalog/src/system_schema/pg_catalog.rs
@@ -34,9 +34,9 @@ use table::TableRef;
 pub use table_names::*;
 use self::pg_namespace::oid_map::{PGNamespaceOidMap, PGNamespaceOidMapRef};
-use crate::system_schema::memory_table::MemoryTable;
+use super::memory_table::MemoryTable;
-use crate::system_schema::utils::tables::u32_column;
+use super::utils::tables::u32_column;
-use crate::system_schema::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
+use super::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
 use crate::CatalogManager;
 lazy_static! {
--- a/src/catalog/src/system_schema/pg_catalog/pg_catalog_memory_table.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_catalog_memory_table.rs
@@ -17,9 +17,9 @@ use std::sync::Arc;
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::vectors::{Int16Vector, StringVector, UInt32Vector, VectorRef};
 use super::oid_column;
 use super::table_names::PG_TYPE;
 use crate::memory_table_cols;
 use crate::system_schema::pg_catalog::oid_column;
 use crate::system_schema::pg_catalog::table_names::PG_TYPE;
 use crate::system_schema::utils::tables::{i16_column, string_column};
 fn pg_type_schema_columns() -> (Vec<ColumnSchema>, Vec<VectorRef>) {
--- a/src/catalog/src/system_schema/pg_catalog/pg_class.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_class.rs
@@ -32,12 +32,12 @@ use snafu::{OptionExt, ResultExt};
 use store_api::storage::ScanRequest;
 use table::metadata::TableType;
 use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
 use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::Predicates;
 use crate::system_schema::pg_catalog::pg_namespace::oid_map::PGNamespaceOidMapRef;
 use crate::system_schema::pg_catalog::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
 use crate::system_schema::utils::tables::{string_column, u32_column};
 use crate::system_schema::SystemTable;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/pg_catalog/pg_database.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_database.rs
@@ -29,12 +29,12 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::ScanRequest;
 use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
 use super::{query_ctx, OID_COLUMN_NAME, PG_DATABASE};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::Predicates;
 use crate::system_schema::pg_catalog::pg_namespace::oid_map::PGNamespaceOidMapRef;
 use crate::system_schema::pg_catalog::{query_ctx, OID_COLUMN_NAME, PG_DATABASE};
 use crate::system_schema::utils::tables::{string_column, u32_column};
 use crate::system_schema::SystemTable;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs
@@ -35,13 +35,11 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::ScanRequest;
 use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::Predicates;
 use crate::system_schema::pg_catalog::{
    query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE,
 };
 use crate::system_schema::utils::tables::{string_column, u32_column};
 use crate::system_schema::SystemTable;
 use crate::CatalogManager;
--- a/src/catalog/src/system_schema/predicate.rs
+++ b/src/catalog/src/system_schema/predicate.rs
@@ -437,7 +437,10 @@ mod tests {
    }
    fn column(name: &str) -> Expr {
-        Expr::Column(Column::from_name(name))
+        Expr::Column(Column {
            relation: None,
            name: name.to_string(),
        })
    }
    fn string_literal(v: &str) -> Expr {
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -27,7 +27,7 @@ use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
 use table::metadata::TableType;
 use table::table::adapter::DfTableProviderAdapter;
-pub mod dummy_catalog;
+mod dummy_catalog;
 use dummy_catalog::DummyCatalogList;
 use table::TableRef;
--- a/src/cli/src/error.rs
+++ b/src/cli/src/error.rs
@@ -17,6 +17,7 @@ use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use rustyline::error::ReadlineError;
 use snafu::{Location, Snafu};
 #[derive(Snafu)]
@@ -104,6 +105,52 @@ pub enum Error {
    #[snafu(display("Invalid REPL command: {reason}"))]
    InvalidReplCommand { reason: String },
    #[snafu(display("Cannot create REPL"))]
    ReplCreation {
        #[snafu(source)]
        error: ReadlineError,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Error reading command"))]
    Readline {
        #[snafu(source)]
        error: ReadlineError,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Failed to request database, sql: {sql}"))]
    RequestDatabase {
        sql: String,
        #[snafu(source)]
        source: client::Error,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Failed to collect RecordBatches"))]
    CollectRecordBatches {
        #[snafu(implicit)]
        location: Location,
        source: common_recordbatch::error::Error,
    },
    #[snafu(display("Failed to pretty print Recordbatches"))]
    PrettyPrintRecordBatches {
        #[snafu(implicit)]
        location: Location,
        source: common_recordbatch::error::Error,
    },
    #[snafu(display("Failed to start Meta client"))]
    StartMetaClient {
        #[snafu(implicit)]
        location: Location,
        source: meta_client::error::Error,
    },
    #[snafu(display("Failed to parse SQL: {}", sql))]
    ParseSql {
        sql: String,
@@ -119,6 +166,13 @@ pub enum Error {
        source: query::error::Error,
    },
    #[snafu(display("Failed to encode logical plan in substrait"))]
    SubstraitEncodeLogicalPlan {
        #[snafu(implicit)]
        location: Location,
        source: substrait::error::Error,
    },
    #[snafu(display("Failed to load layered config"))]
    LoadLayeredConfig {
        #[snafu(source(from(common_config::error::Error, Box::new)))]
@@ -264,10 +318,17 @@ impl ErrorExt for Error {
            Error::StartProcedureManager { source, .. }
            | Error::StopProcedureManager { source, .. } => source.status_code(),
            Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
-            Error::HttpQuerySql { .. } => StatusCode::Internal,
+            Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
                StatusCode::Internal
            }
            Error::RequestDatabase { source, .. } => source.status_code(),
            Error::CollectRecordBatches { source, .. }
            | Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
            Error::StartMetaClient { source, .. } => source.status_code(),
            Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
                source.status_code()
            }
            Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
            Error::SerdeJson { .. }
            | Error::FileIo { .. }
--- a/src/cli/src/lib.rs
+++ b/src/cli/src/lib.rs
@@ -23,12 +23,15 @@ mod helper;
 // Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
 mod database;
 mod import;
 #[allow(unused)]
 mod repl;
 use async_trait::async_trait;
 use clap::Parser;
 use common_error::ext::BoxedError;
 pub use database::DatabaseClient;
 use error::Result;
 pub use repl::Repl;
 pub use crate::bench::BenchTableMetadataCommand;
 pub use crate::export::ExportCommand;
--- a/src/cli/src/repl.rs
+++ b/src/cli/src/repl.rs
@@ -0,0 +1,299 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Instant;
 use cache::{
    build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME,
    TABLE_ROUTE_CACHE_NAME,
 };
 use catalog::information_extension::DistributedInformationExtension;
 use catalog::kvbackend::{
    CachedKvBackend, CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend,
 };
 use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_base::Plugins;
 use common_config::Mode;
 use common_error::ext::ErrorExt;
 use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::kv_backend::KvBackendRef;
 use common_query::Output;
 use common_recordbatch::RecordBatches;
 use common_telemetry::debug;
 use either::Either;
 use meta_client::client::{ClusterKvBackend, MetaClientBuilder};
 use query::datafusion::DatafusionQueryEngine;
 use query::parser::QueryLanguageParser;
 use query::query_engine::{DefaultSerializer, QueryEngineState};
 use query::QueryEngine;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
 use session::context::QueryContext;
 use snafu::{OptionExt, ResultExt};
 use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
 use crate::cmd::ReplCommand;
 use crate::error::{
    CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
    ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
    SubstraitEncodeLogicalPlanSnafu,
 };
 use crate::helper::RustylineHelper;
 use crate::{error, AttachCommand};
 /// Captures the state of the repl, gathers commands and executes them one by one
 pub struct Repl {
    /// Rustyline editor for interacting with user on command line
    rl: Editor<RustylineHelper>,
    /// Current prompt
    prompt: String,
    /// Client for interacting with GreptimeDB
    database: Database,
    query_engine: Option<DatafusionQueryEngine>,
 }
 #[allow(clippy::print_stdout)]
 impl Repl {
    fn print_help(&self) {
        println!("{}", ReplCommand::help())
    }
    pub(crate) async fn try_new(cmd: &AttachCommand) -> Result<Self> {
        let mut rl = Editor::new().context(ReplCreationSnafu)?;
        if !cmd.disable_helper {
            rl.set_helper(Some(RustylineHelper::default()));
            let history_file = history_file();
            if let Err(e) = rl.load_history(&history_file) {
                debug!(
                    "failed to load history file on {}, error: {e}",
                    history_file.display()
                );
            }
        }
        let client = Client::with_urls([&cmd.grpc_addr]);
        let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
        let query_engine = if let Some(meta_addr) = &cmd.meta_addr {
            create_query_engine(meta_addr).await.map(Some)?
        } else {
            None
        };
        Ok(Self {
            rl,
            prompt: "> ".to_string(),
            database,
            query_engine,
        })
    }
    /// Parse the next command
    fn next_command(&mut self) -> Result<ReplCommand> {
        match self.rl.readline(&self.prompt) {
            Ok(ref line) => {
                let request = line.trim();
                let _ = self.rl.add_history_entry(request.to_string());
                request.try_into()
            }
            Err(ReadlineError::Eof) | Err(ReadlineError::Interrupted) => Ok(ReplCommand::Exit),
            // Some sort of real underlying error
            Err(e) => Err(e).context(ReadlineSnafu),
        }
    }
    /// Read Evaluate Print Loop (interactive command line) for GreptimeDB
    ///
    /// Inspired / based on repl.rs from InfluxDB IOX
    pub(crate) async fn run(&mut self) -> Result<()> {
        println!("Ready for commands. (Hint: try 'help')");
        loop {
            match self.next_command()? {
                ReplCommand::Help => {
                    self.print_help();
                }
                ReplCommand::UseDatabase { db_name } => {
                    if self.execute_sql(format!("USE {db_name}")).await {
                        println!("Using {db_name}");
                        self.database.set_schema(&db_name);
                        self.prompt = format!("[{db_name}] > ");
                    }
                }
                ReplCommand::Sql { sql } => {
                    let _ = self.execute_sql(sql).await;
                }
                ReplCommand::Exit => {
                    return Ok(());
                }
            }
        }
    }
    async fn execute_sql(&self, sql: String) -> bool {
        self.do_execute_sql(sql)
            .await
            .map_err(|e| {
                let status_code = e.status_code();
                let root_cause = e.output_msg();
                println!("Error: {}({status_code}), {root_cause}", status_code as u32)
            })
            .is_ok()
    }
    async fn do_execute_sql(&self, sql: String) -> Result<()> {
        let start = Instant::now();
        let output = if let Some(query_engine) = &self.query_engine {
            let query_ctx = Arc::new(QueryContext::with(
                self.database.catalog(),
                self.database.schema(),
            ));
            let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
                .with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
            let plan = query_engine
                .planner()
                .plan(&stmt, query_ctx.clone())
                .await
                .context(PlanStatementSnafu)?;
            let plan = query_engine
                .optimize(&query_engine.engine_context(query_ctx), &plan)
                .context(PlanStatementSnafu)?;
            let plan = DFLogicalSubstraitConvertor {}
                .encode(&plan, DefaultSerializer)
                .context(SubstraitEncodeLogicalPlanSnafu)?;
            self.database.logical_plan(plan.to_vec()).await
        } else {
            self.database.sql(&sql).await
        }
        .context(RequestDatabaseSnafu { sql: &sql })?;
        let either = match output.data {
            OutputData::Stream(s) => {
                let x = RecordBatches::try_collect(s)
                    .await
                    .context(CollectRecordBatchesSnafu)?;
                Either::Left(x)
            }
            OutputData::RecordBatches(x) => Either::Left(x),
            OutputData::AffectedRows(rows) => Either::Right(rows),
        };
        let end = Instant::now();
        match either {
            Either::Left(recordbatches) => {
                let total_rows: usize = recordbatches.iter().map(|x| x.num_rows()).sum();
                if total_rows > 0 {
                    println!(
                        "{}",
                        recordbatches
                            .pretty_print()
                            .context(PrettyPrintRecordBatchesSnafu)?
                    );
                }
                println!("Total Rows: {total_rows}")
            }
            Either::Right(rows) => println!("Affected Rows: {rows}"),
        };
        println!("Cost {} ms", (end - start).as_millis());
        Ok(())
    }
 }
 impl Drop for Repl {
    fn drop(&mut self) {
        if self.rl.helper().is_some() {
            let history_file = history_file();
            if let Err(e) = self.rl.save_history(&history_file) {
                debug!(
                    "failed to save history file on {}, error: {e}",
                    history_file.display()
                );
            }
        }
    }
 }
 /// Return the location of the history file (defaults to $HOME/".greptimedb_cli_history")
 fn history_file() -> PathBuf {
    let mut buf = match std::env::var("HOME") {
        Ok(home) => PathBuf::from(home),
        Err(_) => PathBuf::new(),
    };
    buf.push(".greptimedb_cli_history");
    buf
 }
 async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
    let mut meta_client = MetaClientBuilder::default().enable_store().build();
    meta_client
        .start([meta_addr])
        .await
        .context(StartMetaClientSnafu)?;
    let meta_client = Arc::new(meta_client);
    let cached_meta_backend = Arc::new(
        CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone()))).build(),
    );
    let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
        CacheRegistryBuilder::default()
            .add_cache(cached_meta_backend.clone())
            .build(),
    );
    let fundamental_cache_registry =
        build_fundamental_cache_registry(Arc::new(MetaKvBackend::new(meta_client.clone())));
    let layered_cache_registry = Arc::new(
        with_default_composite_cache_registry(
            layered_cache_builder.add_cache_registry(fundamental_cache_registry),
        )
        .context(error::BuildCacheRegistrySnafu)?
        .build(),
    );
    let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
    let catalog_manager = KvBackendCatalogManager::new(
        information_extension,
        cached_meta_backend.clone(),
        layered_cache_registry,
        None,
    );
    let plugins: Plugins = Default::default();
    let state = Arc::new(QueryEngineState::new(
        catalog_manager,
        None,
        None,
        None,
        None,
        false,
        plugins.clone(),
    ));
    Ok(DatafusionQueryEngine::new(state, plugins))
 }
--- a/src/client/Cargo.toml
+++ b/src/client/Cargo.toml
@@ -16,7 +16,6 @@ arc-swap = "1.6"
 arrow-flight.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
 base64.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
 common-grpc.workspace = true
@@ -26,7 +25,6 @@ common-query.workspace = true
 common-recordbatch.workspace = true
 common-telemetry.workspace = true
 enum_dispatch = "0.3"
 futures.workspace = true
 futures-util.workspace = true
 lazy_static.workspace = true
 moka = { workspace = true, features = ["future"] }
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -12,49 +12,36 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::pin::Pin;
 use std::str::FromStr;
 use api::v1::auth_header::AuthScheme;
 use api::v1::ddl_request::Expr as DdlExpr;
 use api::v1::greptime_database_client::GreptimeDatabaseClient;
 use api::v1::greptime_request::Request;
 use api::v1::query_request::Query;
 use api::v1::{
-    AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
+    AlterTableExpr, AuthHeader, CreateTableExpr, DdlRequest, GreptimeRequest, InsertRequests,
-    InsertRequests, QueryRequest, RequestHeader,
+    QueryRequest, RequestHeader,
 };
-use arrow_flight::{FlightData, Ticket};
+use arrow_flight::Ticket;
 use async_stream::stream;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use common_catalog::build_db_string;
 use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_error::ext::{BoxedError, ErrorExt};
 use common_grpc::flight::do_put::DoPutResponse;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_query::Output;
 use common_recordbatch::error::ExternalSnafu;
 use common_recordbatch::RecordBatchStreamWrapper;
 use common_telemetry::error;
 use common_telemetry::tracing_context::W3cTrace;
-use futures::future;
+use futures_util::StreamExt;
 use futures_util::{Stream, StreamExt, TryStreamExt};
 use prost::Message;
 use snafu::{ensure, ResultExt};
-use tonic::metadata::{AsciiMetadataKey, MetadataValue};
+use tonic::metadata::AsciiMetadataKey;
 use tonic::transport::Channel;
 use crate::error::{
    ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu, InvalidAsciiSnafu,
-    InvalidTonicMetadataValueSnafu, ServerSnafu,
+    ServerSnafu,
 };
 use crate::{from_grpc_response, Client, Result};
 type FlightDataStream = Pin<Box<dyn Stream<Item = FlightData> + Send>>;
 type DoPutResponseStream = Pin<Box<dyn Stream<Item = Result<DoPutResponse>>>>;
 #[derive(Clone, Debug, Default)]
 pub struct Database {
    // The "catalog" and "schema" to be used in processing the requests at the server side.
@@ -121,24 +108,16 @@ impl Database {
        self.catalog = catalog.into();
    }
-    fn catalog_or_default(&self) -> &str {
+    pub fn catalog(&self) -> &String {
-        if self.catalog.is_empty() {
+        &self.catalog
            DEFAULT_CATALOG_NAME
        } else {
            &self.catalog
        }
    }
    pub fn set_schema(&mut self, schema: impl Into<String>) {
        self.schema = schema.into();
    }
-    fn schema_or_default(&self) -> &str {
+    pub fn schema(&self) -> &String {
-        if self.schema.is_empty() {
+        &self.schema
            DEFAULT_SCHEMA_NAME
        } else {
            &self.schema
        }
    }
    pub fn set_timezone(&mut self, timezone: impl Into<String>) {
@@ -185,7 +164,7 @@ impl Database {
        from_grpc_response(response)
    }
-    pub async fn handle(&self, request: Request) -> Result<u32> {
+    async fn handle(&self, request: Request) -> Result<u32> {
        let mut client = make_database_client(&self.client)?.inner;
        let request = self.to_rpc_request(request);
        let response = client.handle(request).await?.into_inner();
@@ -331,41 +310,6 @@ impl Database {
            }
        }
    }
    /// Ingest a stream of [RecordBatch]es that belong to a table, using Arrow Flight's "`DoPut`"
    /// method. The return value is also a stream, produces [DoPutResponse]s.
    pub async fn do_put(&self, stream: FlightDataStream) -> Result<DoPutResponseStream> {
        let mut request = tonic::Request::new(stream);
        if let Some(AuthHeader {
            auth_scheme: Some(AuthScheme::Basic(Basic { username, password })),
        }) = &self.ctx.auth_header
        {
            let encoded = BASE64_STANDARD.encode(format!("{username}:{password}"));
            let value =
                MetadataValue::from_str(&encoded).context(InvalidTonicMetadataValueSnafu)?;
            request.metadata_mut().insert("x-greptime-auth", value);
        }
        let db_to_put = if !self.dbname.is_empty() {
            &self.dbname
        } else {
            &build_db_string(self.catalog_or_default(), self.schema_or_default())
        };
        request.metadata_mut().insert(
            "x-greptime-db-name",
            MetadataValue::from_str(db_to_put).context(InvalidTonicMetadataValueSnafu)?,
        );
        let mut client = self.client.make_flight_client()?;
        let response = client.mut_inner().do_put(request).await?;
        let response = response
            .into_inner()
            .map_err(Into::into)
            .and_then(|x| future::ready(DoPutResponse::try_from(x).context(ConvertFlightDataSnafu)))
            .boxed();
        Ok(response)
    }
 }
 #[derive(Default, Debug, Clone)]
--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -15,11 +15,10 @@
 use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
-use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode};
+use common_error::status_code::StatusCode;
 use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
 use common_macro::stack_trace_debug;
 use snafu::{location, Location, Snafu};
 use tonic::metadata::errors::InvalidMetadataValue;
 use tonic::{Code, Status};
 #[derive(Snafu)]
@@ -116,14 +115,6 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Invalid Tonic metadata value"))]
    InvalidTonicMetadataValue {
        #[snafu(source)]
        error: InvalidMetadataValue,
        #[snafu(implicit)]
        location: Location,
    },
 }
 pub type Result<T> = std::result::Result<T, Error>;
@@ -144,9 +135,7 @@ impl ErrorExt for Error {
            | Error::CreateTlsChannel { source, .. } => source.status_code(),
            Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
-            Error::InvalidAscii { .. } | Error::InvalidTonicMetadataValue { .. } => {
+            Error::InvalidAscii { .. } => StatusCode::InvalidArguments,
                StatusCode::InvalidArguments
            }
        }
    }
@@ -163,15 +152,15 @@ impl From<Status> for Error {
                .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
        }
-        let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| {
+        let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
-            if let Ok(code) = s.parse::<u32>() {
+            .and_then(|s| {
-                StatusCode::from_u32(code)
+                if let Ok(code) = s.parse::<u32>() {
-            } else {
+                    StatusCode::from_u32(code)
-                None
+                } else {
-            }
+                    None
-        });
+                }
-        let tonic_code = e.code();
+            })
-        let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code));
+            .unwrap_or(StatusCode::Unknown);
        let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
            .unwrap_or_else(|| e.message().to_string());
@@ -198,6 +187,9 @@ impl Error {
            } | Self::RegionServer {
                code: Code::Unavailable,
                ..
            } | Self::RegionServer {
                code: Code::Unknown,
                ..
            }
        )
    }
--- a/src/client/src/lib.rs
+++ b/src/client/src/lib.rs
@@ -16,7 +16,7 @@
 mod client;
 pub mod client_manager;
-pub mod database;
+mod database;
 pub mod error;
 pub mod flow;
 pub mod load_balance;
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -201,11 +201,12 @@ impl RegionRequester {
            .await
            .map_err(|e| {
                let code = e.code();
                let err: error::Error = e.into();
                // Uses `Error::RegionServer` instead of `Error::Server`
                error::Error::RegionServer {
                    addr,
                    code,
-                    source: BoxedError::new(error::Error::from(e)),
+                    source: BoxedError::new(err),
                    location: location!(),
                }
            })?
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -68,6 +68,7 @@ query.workspace = true
 rand.workspace = true
 regex.workspace = true
 reqwest.workspace = true
 rustyline = "10.1"
 serde.workspace = true
 serde_json.workspace = true
 servers.workspace = true
--- a/src/cmd/src/error.rs
+++ b/src/cmd/src/error.rs
@@ -17,6 +17,7 @@ use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use rustyline::error::ReadlineError;
 use snafu::{Location, Snafu};
 #[derive(Snafu)]
@@ -180,6 +181,52 @@ pub enum Error {
    #[snafu(display("Invalid REPL command: {reason}"))]
    InvalidReplCommand { reason: String },
    #[snafu(display("Cannot create REPL"))]
    ReplCreation {
        #[snafu(source)]
        error: ReadlineError,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Error reading command"))]
    Readline {
        #[snafu(source)]
        error: ReadlineError,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Failed to request database, sql: {sql}"))]
    RequestDatabase {
        sql: String,
        #[snafu(source)]
        source: client::Error,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Failed to collect RecordBatches"))]
    CollectRecordBatches {
        #[snafu(implicit)]
        location: Location,
        source: common_recordbatch::error::Error,
    },
    #[snafu(display("Failed to pretty print Recordbatches"))]
    PrettyPrintRecordBatches {
        #[snafu(implicit)]
        location: Location,
        source: common_recordbatch::error::Error,
    },
    #[snafu(display("Failed to start Meta client"))]
    StartMetaClient {
        #[snafu(implicit)]
        location: Location,
        source: meta_client::error::Error,
    },
    #[snafu(display("Failed to parse SQL: {}", sql))]
    ParseSql {
        sql: String,
@@ -195,6 +242,13 @@ pub enum Error {
        source: query::error::Error,
    },
    #[snafu(display("Failed to encode logical plan in substrait"))]
    SubstraitEncodeLogicalPlan {
        #[snafu(implicit)]
        location: Location,
        source: substrait::error::Error,
    },
    #[snafu(display("Failed to load layered config"))]
    LoadLayeredConfig {
        #[snafu(source(from(common_config::error::Error, Box::new)))]
@@ -341,10 +395,17 @@ impl ErrorExt for Error {
            | Error::StopProcedureManager { source, .. } => source.status_code(),
            Error::BuildWalOptionsAllocator { source, .. }
            | Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
-            Error::HttpQuerySql { .. } => StatusCode::Internal,
+            Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
                StatusCode::Internal
            }
            Error::RequestDatabase { source, .. } => source.status_code(),
            Error::CollectRecordBatches { source, .. }
            | Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
            Error::StartMetaClient { source, .. } => source.status_code(),
            Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
                source.status_code()
            }
            Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
            Error::SerdeJson { .. }
            | Error::FileIo { .. }
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -32,9 +32,7 @@ use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
 use common_telemetry::logging::TracingOptions;
 use common_version::{short_version, version};
-use flow::{
+use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
    FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendClient, FrontendInvoker,
 };
 use meta_client::{MetaClientOptions, MetaClientType};
 use snafu::{ensure, OptionExt, ResultExt};
 use tracing_appender::non_blocking::WorkerGuard;
@@ -315,26 +313,16 @@ impl StartCommand {
        );
        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
        let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
        let flownode_builder = FlownodeBuilder::new(
-            opts.clone(),
+            opts,
            Plugins::new(),
            table_metadata_manager,
            catalog_manager.clone(),
            flow_metadata_manager,
            Arc::new(frontend_client),
        )
        .with_heartbeat_task(heartbeat_task);
-        let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
+        let flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
        let services = FlownodeServiceBuilder::new(&opts)
            .with_grpc_server(flownode.flownode_server().clone())
            .enable_http_service()
            .build()
            .await
            .context(StartFlownodeSnafu)?;
        flownode.setup_services(services);
        let flownode = flownode;
        // flownode's frontend to datanode need not timeout.
        // Some queries are expected to take long time.
@@ -345,7 +333,7 @@ impl StartCommand {
        let client = Arc::new(NodeClients::new(channel_config));
        let invoker = FrontendInvoker::build_from(
-            flownode.flow_engine().streaming_engine(),
+            flownode.flow_worker_manager().clone(),
            catalog_manager.clone(),
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
@@ -355,9 +343,7 @@ impl StartCommand {
        .await
        .context(StartFlownodeSnafu)?;
        flownode
-            .flow_engine()
+            .flow_worker_manager()
            .streaming_engine()
            // TODO(discord9): refactor and avoid circular reference
            .set_frontend_invoker(invoker)
            .await;
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -132,7 +132,7 @@ impl SubCommand {
 }
 #[derive(Debug, Default, Parser)]
-pub struct StartCommand {
+struct StartCommand {
    /// The address to bind the gRPC server.
    #[clap(long, alias = "bind-addr")]
    rpc_bind_addr: Option<String>,
@@ -172,7 +172,7 @@ pub struct StartCommand {
 }
 impl StartCommand {
-    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
+    fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
        let mut opts = MetasrvOptions::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
@@ -261,7 +261,7 @@ impl StartCommand {
        Ok(())
    }
-    pub async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
+    async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
        common_runtime::init_global_runtimes(&opts.runtime);
        let guard = common_telemetry::init_global_logging(
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -55,10 +55,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
 use datanode::datanode::{Datanode, DatanodeBuilder};
 use datanode::region_server::RegionServer;
 use file_engine::config::EngineConfig as FileEngineConfig;
-use flow::{
+use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
    FlowConfig, FlownodeBuilder, FlownodeInstance, FlownodeOptions, FrontendClient,
    FrontendInvoker, GrpcQueryHandlerWithBoxedError, StreamingEngine,
 };
 use frontend::frontend::{Frontend, FrontendOptions};
 use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{Instance as FeInstance, StandaloneDatanodeManager};
@@ -77,10 +74,10 @@ use servers::http::HttpOptions;
 use servers::tls::{TlsMode, TlsOption};
 use servers::Mode;
 use snafu::ResultExt;
-use tokio::sync::RwLock;
+use tokio::sync::{broadcast, RwLock};
 use tracing_appender::non_blocking::WorkerGuard;
-use crate::error::{Result, StartFlownodeSnafu};
+use crate::error::Result;
 use crate::options::{GlobalOptions, GreptimeOptions};
 use crate::{error, log_versions, App};
@@ -247,7 +244,9 @@ impl StandaloneOptions {
 pub struct Instance {
    datanode: Datanode,
    frontend: Frontend,
-    flownode: FlownodeInstance,
+    // TODO(discord9): wrapped it in flownode instance instead
    flow_worker_manager: Arc<FlowWorkerManager>,
    flow_shutdown: broadcast::Sender<()>,
    procedure_manager: ProcedureManagerRef,
    wal_options_allocator: WalOptionsAllocatorRef,
    // Keep the logging guard to prevent the worker from being dropped.
@@ -289,7 +288,9 @@ impl App for Instance {
            .await
            .context(error::StartFrontendSnafu)?;
-        self.flownode.start().await.context(StartFlownodeSnafu)?;
+        self.flow_worker_manager
            .clone()
            .run_background(Some(self.flow_shutdown.subscribe()));
        Ok(())
    }
@@ -310,9 +311,14 @@ impl App for Instance {
            .await
            .context(error::ShutdownDatanodeSnafu)?;
-        self.flownode
+        self.flow_shutdown
-            .shutdown()
+            .send(())
-            .await
+            .map_err(|_e| {
                flow::error::InternalSnafu {
                    reason: "Failed to send shutdown signal to flow worker manager, all receiver end already closed".to_string(),
                }
                .build()
            })
            .context(error::ShutdownFlownodeSnafu)?;
        info!("Datanode instance stopped.");
@@ -523,36 +529,32 @@ impl StartCommand {
            flow: opts.flow.clone(),
            ..Default::default()
        };
        // for standalone not use grpc, but get a handler to frontend grpc client without
        // actually make a connection
        let (frontend_client, frontend_instance_handler) =
            FrontendClient::from_empty_grpc_handler();
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
            Arc::new(frontend_client.clone()),
        );
-        let flownode = flow_builder
+        let flownode = Arc::new(
-            .build()
+            flow_builder
-            .await
+                .build()
-            .map_err(BoxedError::new)
+                .await
-            .context(error::OtherSnafu)?;
+                .map_err(BoxedError::new)
                .context(error::OtherSnafu)?,
        );
        // set the ref to query for the local flow state
        {
-            let flow_streaming_engine = flownode.flow_engine().streaming_engine();
+            let flow_worker_manager = flownode.flow_worker_manager();
            information_extension
-                .set_flow_streaming_engine(flow_streaming_engine)
+                .set_flow_worker_manager(flow_worker_manager.clone())
                .await;
        }
        let node_manager = Arc::new(StandaloneDatanodeManager {
            region_server: datanode.region_server(),
-            flow_server: flownode.flow_engine(),
+            flow_server: flownode.flow_worker_manager(),
        });
        let table_id_sequence = Arc::new(
@@ -606,19 +608,10 @@ impl StartCommand {
        .context(error::StartFrontendSnafu)?;
        let fe_instance = Arc::new(fe_instance);
-        // set the frontend client for flownode
+        let flow_worker_manager = flownode.flow_worker_manager();
        let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
        let weak_grpc_handler = Arc::downgrade(&grpc_handler);
        frontend_instance_handler
            .lock()
            .unwrap()
            .replace(weak_grpc_handler);
        // set the frontend invoker for flownode
        let flow_streaming_engine = flownode.flow_engine().streaming_engine();
        // flow server need to be able to use frontend to write insert requests back
        let invoker = FrontendInvoker::build_from(
-            flow_streaming_engine.clone(),
+            flow_worker_manager.clone(),
            catalog_manager.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
@@ -627,7 +620,9 @@ impl StartCommand {
        )
        .await
        .context(error::StartFlownodeSnafu)?;
-        flow_streaming_engine.set_frontend_invoker(invoker).await;
+        flow_worker_manager.set_frontend_invoker(invoker).await;
        let (tx, _rx) = broadcast::channel(1);
        let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
            .context(error::ServersSnafu)?;
@@ -647,7 +642,8 @@ impl StartCommand {
        Ok(Instance {
            datanode,
            frontend,
-            flownode,
+            flow_worker_manager,
            flow_shutdown: tx,
            procedure_manager,
            wal_options_allocator,
            _guard: guard,
@@ -703,7 +699,7 @@ pub struct StandaloneInformationExtension {
    region_server: RegionServer,
    procedure_manager: ProcedureManagerRef,
    start_time_ms: u64,
-    flow_streaming_engine: RwLock<Option<Arc<StreamingEngine>>>,
+    flow_worker_manager: RwLock<Option<Arc<FlowWorkerManager>>>,
 }
 impl StandaloneInformationExtension {
@@ -712,14 +708,14 @@ impl StandaloneInformationExtension {
            region_server,
            procedure_manager,
            start_time_ms: common_time::util::current_time_millis() as u64,
-            flow_streaming_engine: RwLock::new(None),
+            flow_worker_manager: RwLock::new(None),
        }
    }
-    /// Set the flow streaming engine for the standalone instance.
+    /// Set the flow worker manager for the standalone instance.
-    pub async fn set_flow_streaming_engine(&self, flow_streaming_engine: Arc<StreamingEngine>) {
+    pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowWorkerManager>) {
-        let mut guard = self.flow_streaming_engine.write().await;
+        let mut guard = self.flow_worker_manager.write().await;
-        *guard = Some(flow_streaming_engine);
+        *guard = Some(flow_worker_manager);
    }
 }
@@ -788,8 +784,6 @@ impl InformationExtension for StandaloneInformationExtension {
                    sst_size: region_stat.sst_size,
                    index_size: region_stat.index_size,
                    region_manifest: region_stat.manifest.into(),
                    data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
                    metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
                }
            })
            .collect::<Vec<_>>();
@@ -798,7 +792,7 @@ impl InformationExtension for StandaloneInformationExtension {
    async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
        Ok(Some(
-            self.flow_streaming_engine
+            self.flow_worker_manager
                .read()
                .await
                .as_ref()
--- a/src/cmd/tests/cli.rs
+++ b/src/cmd/tests/cli.rs
@@ -0,0 +1,148 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #[cfg(target_os = "macos")]
 mod tests {
    use std::path::PathBuf;
    use std::process::{Command, Stdio};
    use std::time::Duration;
    use common_test_util::temp_dir::create_temp_dir;
    use rexpect::session::PtyReplSession;
    struct Repl {
        repl: PtyReplSession,
    }
    impl Repl {
        fn send_line(&mut self, line: &str) {
            let _ = self.repl.send_line(line).unwrap();
            // read a line to consume the prompt
            let _ = self.read_line();
        }
        fn read_line(&mut self) -> String {
            self.repl.read_line().unwrap()
        }
        fn read_expect(&mut self, expect: &str) {
            assert_eq!(self.read_line(), expect);
        }
        fn read_contains(&mut self, pat: &str) {
            assert!(self.read_line().contains(pat));
        }
    }
    // TODO(LFC): Un-ignore this REPL test.
    // Ignore this REPL test because some logical plans like create database are not supported yet in Datanode.
    #[ignore]
    #[test]
    fn test_repl() {
        let data_home = create_temp_dir("data");
        let wal_dir = create_temp_dir("wal");
        let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
        bin_path.push("../../target/debug");
        let bin_path = bin_path.to_str().unwrap();
        let mut datanode = Command::new("./greptime")
            .current_dir(bin_path)
            .args([
                "datanode",
                "start",
                "--rpc-bind-addr=0.0.0.0:4321",
                "--node-id=1",
                &format!("--data-home={}", data_home.path().display()),
                &format!("--wal-dir={}", wal_dir.path().display()),
            ])
            .stdout(Stdio::null())
            .spawn()
            .unwrap();
        // wait for Datanode actually started
        std::thread::sleep(Duration::from_secs(3));
        let mut repl_cmd = Command::new("./greptime");
        let _ = repl_cmd.current_dir(bin_path).args([
            "--log-level=off",
            "cli",
            "attach",
            "--grpc-bind-addr=0.0.0.0:4321",
            // history commands can sneaky into stdout and mess up our tests, so disable it
            "--disable-helper",
        ]);
        let pty_session = rexpect::session::spawn_command(repl_cmd, Some(5_000)).unwrap();
        let repl = PtyReplSession {
            prompt: "> ".to_string(),
            pty_session,
            quit_command: None,
            echo_on: false,
        };
        let repl = &mut Repl { repl };
        repl.read_expect("Ready for commands. (Hint: try 'help')");
        test_create_database(repl);
        test_use_database(repl);
        test_create_table(repl);
        test_insert(repl);
        test_select(repl);
        datanode.kill().unwrap();
        let _ = datanode.wait().unwrap();
    }
    fn test_create_database(repl: &mut Repl) {
        repl.send_line("CREATE DATABASE db;");
        repl.read_expect("Affected Rows: 1");
        repl.read_contains("Cost");
    }
    fn test_use_database(repl: &mut Repl) {
        repl.send_line("USE db");
        repl.read_expect("Total Rows: 0");
        repl.read_contains("Cost");
        repl.read_expect("Using db");
    }
    fn test_create_table(repl: &mut Repl) {
        repl.send_line("CREATE TABLE t(x STRING, ts TIMESTAMP TIME INDEX);");
        repl.read_expect("Affected Rows: 0");
        repl.read_contains("Cost");
    }
    fn test_insert(repl: &mut Repl) {
        repl.send_line("INSERT INTO t(x, ts) VALUES ('hello', 1676895812239);");
        repl.read_expect("Affected Rows: 1");
        repl.read_contains("Cost");
    }
    fn test_select(repl: &mut Repl) {
        repl.send_line("SELECT * FROM t;");
        repl.read_expect("+-------+-------------------------+");
        repl.read_expect("| x     | ts                      |");
        repl.read_expect("+-------+-------------------------+");
        repl.read_expect("| hello | 2023-02-20T12:23:32.239 |");
        repl.read_expect("+-------+-------------------------+");
        repl.read_expect("Total Rows: 1");
        repl.read_contains("Cost");
    }
 }
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -74,7 +74,6 @@ fn test_load_datanode_example_config() {
                RegionEngineConfig::File(FileEngineConfig {}),
                RegionEngineConfig::Metric(MetricEngineConfig {
                    experimental_sparse_primary_key_encoding: false,
                    flush_metadata_region_interval: Duration::from_secs(30),
                }),
            ],
            logging: LoggingOptions {
@@ -217,7 +216,6 @@ fn test_load_standalone_example_config() {
                RegionEngineConfig::File(FileEngineConfig {}),
                RegionEngineConfig::Metric(MetricEngineConfig {
                    experimental_sparse_primary_key_encoding: false,
                    flush_metadata_region_interval: Duration::from_secs(30),
                }),
            ],
            storage: StorageConfig {
--- a/src/common/base/src/plugins.rs
+++ b/src/common/base/src/plugins.rs
@@ -31,8 +31,7 @@ impl Plugins {
    }
    pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
-        let last = self.write().insert(value);
+        let _ = self.write().insert(value);
        assert!(last.is_none(), "each type of plugins must be one and only");
    }
    pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
@@ -138,12 +137,4 @@ mod tests {
        assert_eq!(plugins.len(), 2);
        assert!(!plugins.is_empty());
    }
    #[test]
    #[should_panic(expected = "each type of plugins must be one and only")]
    fn test_plugin_uniqueness() {
        let plugins = Plugins::new();
        plugins.insert(1i32);
        plugins.insert(2i32);
    }
 }
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -31,8 +31,7 @@ derive_builder.workspace = true
 futures.workspace = true
 lazy_static.workspace = true
 object-store.workspace = true
-object_store_opendal.workspace = true
+orc-rust = { version = "0.5", default-features = false, features = [
 orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "3134cab581a8e91b942d6a23aca2916ea965f6bb", default-features = false, features = [
    "async",
 ] }
 parquet.workspace = true
--- a/src/common/datasource/src/compression.rs
+++ b/src/common/datasource/src/compression.rs
@@ -19,7 +19,6 @@ use std::str::FromStr;
 use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder, ZstdDecoder};
 use async_compression::tokio::write;
 use bytes::Bytes;
 use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
 use futures::Stream;
 use serde::{Deserialize, Serialize};
 use strum::EnumIter;
@@ -193,15 +192,3 @@ macro_rules! impl_compression_type {
 }
 impl_compression_type!((Gzip, Gzip), (Bzip2, Bz), (Xz, Xz), (Zstd, Zstd));
 impl From<CompressionType> for FileCompressionType {
    fn from(t: CompressionType) -> Self {
        match t {
            CompressionType::Gzip => FileCompressionType::GZIP,
            CompressionType::Bzip2 => FileCompressionType::BZIP2,
            CompressionType::Xz => FileCompressionType::XZ,
            CompressionType::Zstd => FileCompressionType::ZSTD,
            CompressionType::Uncompressed => FileCompressionType::UNCOMPRESSED,
        }
    }
 }
--- a/src/common/datasource/src/file_format/csv.rs
+++ b/src/common/datasource/src/file_format/csv.rs
@@ -14,23 +14,28 @@
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::Arc;
 use arrow::csv;
 use arrow::csv::reader::Format;
 use arrow::record_batch::RecordBatch;
-use arrow_schema::Schema;
+use arrow_schema::{Schema, SchemaRef};
 use async_trait::async_trait;
 use common_runtime;
 use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
 use datafusion::error::Result as DataFusionResult;
 use datafusion::physical_plan::SendableRecordBatchStream;
 use derive_builder::Builder;
 use object_store::ObjectStore;
 use snafu::ResultExt;
 use tokio_util::compat::FuturesAsyncReadCompatExt;
 use tokio_util::io::SyncIoBridge;
 use super::stream_to_file;
 use crate::buffered_writer::DfRecordBatchEncoder;
 use crate::compression::CompressionType;
 use crate::error::{self, Result};
-use crate::file_format::{self, stream_to_file, FileFormat};
+use crate::file_format::{self, open_with_decoder, FileFormat};
 use crate::share_buffer::SharedBuffer;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -95,6 +100,66 @@ impl Default for CsvFormat {
    }
 }
 #[derive(Debug, Clone, Builder)]
 pub struct CsvConfig {
    batch_size: usize,
    file_schema: SchemaRef,
    #[builder(default = "None")]
    file_projection: Option<Vec<usize>>,
    #[builder(default = "true")]
    has_header: bool,
    #[builder(default = "b','")]
    delimiter: u8,
 }
 impl CsvConfig {
    fn builder(&self) -> csv::ReaderBuilder {
        let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
            .with_delimiter(self.delimiter)
            .with_batch_size(self.batch_size)
            .with_header(self.has_header);
        if let Some(proj) = &self.file_projection {
            builder = builder.with_projection(proj.clone());
        }
        builder
    }
 }
 #[derive(Debug, Clone)]
 pub struct CsvOpener {
    config: Arc<CsvConfig>,
    object_store: Arc<ObjectStore>,
    compression_type: CompressionType,
 }
 impl CsvOpener {
    /// Return a new [`CsvOpener`]. The caller must ensure [`CsvConfig`].file_schema must correspond to the opening file.
    pub fn new(
        config: CsvConfig,
        object_store: ObjectStore,
        compression_type: CompressionType,
    ) -> Self {
        CsvOpener {
            config: Arc::new(config),
            object_store: Arc::new(object_store),
            compression_type,
        }
    }
 }
 impl FileOpener for CsvOpener {
    fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
        open_with_decoder(
            self.object_store.clone(),
            meta.location().to_string(),
            self.compression_type,
            || Ok(self.config.builder().build_decoder()),
        )
    }
 }
 #[async_trait]
 impl FileFormat for CsvFormat {
    async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
--- a/src/common/datasource/src/file_format/json.rs
+++ b/src/common/datasource/src/file_format/json.rs
@@ -15,24 +15,29 @@
 use std::collections::HashMap;
 use std::io::BufReader;
 use std::str::FromStr;
 use std::sync::Arc;
-use arrow::json;
+use arrow::datatypes::SchemaRef;
 use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
 use arrow::json::writer::LineDelimited;
 use arrow::json::{self, ReaderBuilder};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::Schema;
 use async_trait::async_trait;
 use common_runtime;
 use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::physical_plan::SendableRecordBatchStream;
 use object_store::ObjectStore;
 use snafu::ResultExt;
 use tokio_util::compat::FuturesAsyncReadCompatExt;
 use tokio_util::io::SyncIoBridge;
 use super::stream_to_file;
 use crate::buffered_writer::DfRecordBatchEncoder;
 use crate::compression::CompressionType;
 use crate::error::{self, Result};
-use crate::file_format::{self, stream_to_file, FileFormat};
+use crate::file_format::{self, open_with_decoder, FileFormat};
 use crate::share_buffer::SharedBuffer;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -109,6 +114,47 @@ impl FileFormat for JsonFormat {
    }
 }
 #[derive(Debug, Clone)]
 pub struct JsonOpener {
    batch_size: usize,
    projected_schema: SchemaRef,
    object_store: Arc<ObjectStore>,
    compression_type: CompressionType,
 }
 impl JsonOpener {
    /// Return a new [`JsonOpener`]. Any fields not present in `projected_schema` will be ignored.
    pub fn new(
        batch_size: usize,
        projected_schema: SchemaRef,
        object_store: ObjectStore,
        compression_type: CompressionType,
    ) -> Self {
        Self {
            batch_size,
            projected_schema,
            object_store: Arc::new(object_store),
            compression_type,
        }
    }
 }
 impl FileOpener for JsonOpener {
    fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
        open_with_decoder(
            self.object_store.clone(),
            meta.location().to_string(),
            self.compression_type,
            || {
                ReaderBuilder::new(self.projected_schema.clone())
                    .with_batch_size(self.batch_size)
                    .build_decoder()
                    .map_err(DataFusionError::from)
            },
        )
    }
 }
 pub async fn stream_to_json(
    stream: SendableRecordBatchStream,
    store: ObjectStore,
--- a/src/common/datasource/src/file_format/tests.rs
+++ b/src/common/datasource/src/file_format/tests.rs
@@ -19,10 +19,7 @@ use std::vec;
 use common_test_util::find_workspace_path;
 use datafusion::assert_batches_eq;
-use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
+use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
 use datafusion::datasource::physical_plan::{
    CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener, ParquetExec,
 };
 use datafusion::execution::context::TaskContext;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion::physical_plan::ExecutionPlan;
@@ -30,11 +27,14 @@ use datafusion::prelude::SessionContext;
 use futures::StreamExt;
 use super::FORMAT_TYPE;
 use crate::compression::CompressionType;
 use crate::error;
 use crate::file_format::csv::{CsvConfigBuilder, CsvOpener};
 use crate::file_format::json::JsonOpener;
 use crate::file_format::orc::{OrcFormat, OrcOpener};
 use crate::file_format::parquet::DefaultParquetFileReaderFactory;
 use crate::file_format::{FileFormat, Format};
-use crate::test_util::{scan_config, test_basic_schema, test_store};
+use crate::test_util::{self, scan_config, test_basic_schema, test_store};
 use crate::{error, test_util};
 struct Test<'a, T: FileOpener> {
    config: FileScanConfig,
@@ -62,18 +62,15 @@ impl<T: FileOpener> Test<'_, T> {
 #[tokio::test]
 async fn test_json_opener() {
    let store = test_store("/");
    let store = Arc::new(object_store_opendal::OpendalStore::new(store));
    let schema = test_basic_schema();
-    let json_opener = || {
+    let json_opener = JsonOpener::new(
-        JsonOpener::new(
+        100,
-            test_util::TEST_BATCH_SIZE,
+        schema.clone(),
-            schema.clone(),
+        store.clone(),
-            FileCompressionType::UNCOMPRESSED,
+        CompressionType::Uncompressed,
-            store.clone(),
+    );
        )
    };
    let path = &find_workspace_path("/src/common/datasource/tests/json/basic.json")
        .display()
@@ -81,7 +78,7 @@ async fn test_json_opener() {
    let tests = [
        Test {
            config: scan_config(schema.clone(), None, path),
-            opener: json_opener(),
+            opener: json_opener.clone(),
            expected: vec![
                "+-----+-------+",
                "| num | str   |",
@@ -94,7 +91,7 @@ async fn test_json_opener() {
        },
        Test {
            config: scan_config(schema.clone(), Some(1), path),
-            opener: json_opener(),
+            opener: json_opener.clone(),
            expected: vec![
                "+-----+------+",
                "| num | str  |",
@@ -113,30 +110,23 @@ async fn test_json_opener() {
 #[tokio::test]
 async fn test_csv_opener() {
    let store = test_store("/");
    let store = Arc::new(object_store_opendal::OpendalStore::new(store));
    let schema = test_basic_schema();
    let path = &find_workspace_path("/src/common/datasource/tests/csv/basic.csv")
        .display()
        .to_string();
-    let csv_config = Arc::new(CsvConfig::new(
+    let csv_conf = CsvConfigBuilder::default()
-        test_util::TEST_BATCH_SIZE,
+        .batch_size(test_util::TEST_BATCH_SIZE)
-        schema.clone(),
+        .file_schema(schema.clone())
-        None,
+        .build()
-        true,
+        .unwrap();
        b',',
        b'"',
        None,
        store,
        None,
    ));
-    let csv_opener = || CsvOpener::new(csv_config.clone(), FileCompressionType::UNCOMPRESSED);
+    let csv_opener = CsvOpener::new(csv_conf, store, CompressionType::Uncompressed);
    let tests = [
        Test {
            config: scan_config(schema.clone(), None, path),
-            opener: csv_opener(),
+            opener: csv_opener.clone(),
            expected: vec![
                "+-----+-------+",
                "| num | str   |",
@@ -149,7 +139,7 @@ async fn test_csv_opener() {
        },
        Test {
            config: scan_config(schema.clone(), Some(1), path),
-            opener: csv_opener(),
+            opener: csv_opener.clone(),
            expected: vec![
                "+-----+------+",
                "| num | str  |",
--- a/src/common/datasource/src/test_util.rs
+++ b/src/common/datasource/src/test_util.rs
@@ -16,19 +16,17 @@ use std::sync::Arc;
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use common_test_util::temp_dir::{create_temp_dir, TempDir};
-use datafusion::common::{Constraints, Statistics};
+use datafusion::common::Statistics;
 use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::{
+use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
    CsvConfig, CsvOpener, FileScanConfig, FileStream, JsonOpener,
 };
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use object_store::services::Fs;
 use object_store::ObjectStore;
-use crate::file_format::csv::stream_to_csv;
+use crate::compression::CompressionType;
-use crate::file_format::json::stream_to_json;
+use crate::file_format::csv::{stream_to_csv, CsvConfigBuilder, CsvOpener};
 use crate::file_format::json::{stream_to_json, JsonOpener};
 use crate::test_util;
 pub const TEST_BATCH_SIZE: usize = 100;
@@ -76,7 +74,6 @@ pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str)
        object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
        file_schema,
        file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
        constraints: Constraints::empty(),
        statistics,
        projection: None,
        limit,
@@ -93,8 +90,8 @@ pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usi
    let json_opener = JsonOpener::new(
        test_util::TEST_BATCH_SIZE,
        schema.clone(),
-        FileCompressionType::UNCOMPRESSED,
+        store.clone(),
-        Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
+        CompressionType::Uncompressed,
    );
    let size = store.read(origin_path).await.unwrap().len();
@@ -127,19 +124,13 @@ pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usiz
    let schema = test_basic_schema();
-    let csv_config = Arc::new(CsvConfig::new(
+    let csv_conf = CsvConfigBuilder::default()
-        TEST_BATCH_SIZE,
+        .batch_size(test_util::TEST_BATCH_SIZE)
-        schema.clone(),
+        .file_schema(schema.clone())
-        None,
+        .build()
-        true,
+        .unwrap();
        b',',
        b'"',
        None,
        Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
        None,
    ));
-    let csv_opener = CsvOpener::new(csv_config, FileCompressionType::UNCOMPRESSED);
+    let csv_opener = CsvOpener::new(csv_conf, store.clone(), CompressionType::Uncompressed);
    let size = store.read(origin_path).await.unwrap().len();
--- a/src/common/error/Cargo.toml
+++ b/src/common/error/Cargo.toml
@@ -12,6 +12,3 @@ http.workspace = true
 snafu.workspace = true
 strum.workspace = true
 tonic.workspace = true
 [dev-dependencies]
 common-macro.workspace = true
--- a/src/common/error/src/ext.rs
+++ b/src/common/error/src/ext.rs
@@ -42,7 +42,7 @@ pub trait ErrorExt: StackError {
                if let Some(external_error) = error.source() {
                    let external_root = external_error.sources().last().unwrap();
-                    if error.transparent() {
+                    if error.to_string().is_empty() {
                        format!("{external_root}")
                    } else {
                        format!("{error}: {external_root}")
@@ -86,14 +86,6 @@ pub trait StackError: std::error::Error {
        }
        result
    }
    /// Indicates whether this error is "transparent", that it delegates its "display" and "source"
    /// to the underlying error. Could be useful when you are just wrapping some external error,
    /// **AND** can not or would not provide meaningful contextual info. For example, the
    /// `DataFusionError`.
    fn transparent(&self) -> bool {
        false
    }
 }
 impl<T: ?Sized + StackError> StackError for Arc<T> {
--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -34,14 +34,12 @@ pub enum StatusCode {
    Internal = 1003,
    /// Invalid arguments.
    InvalidArguments = 1004,
-    /// The task is cancelled (typically caller-side).
+    /// The task is cancelled.
    Cancelled = 1005,
    /// Illegal state, can be exposed to users.
    IllegalState = 1006,
    /// Caused by some error originated from external system.
    External = 1007,
    /// The request is deadline exceeded (typically server-side).
    DeadlineExceeded = 1008,
    // ====== End of common status code ================
    // ====== Begin of SQL related status code =========
@@ -144,7 +142,6 @@ impl StatusCode {
            | StatusCode::Unexpected
            | StatusCode::InvalidArguments
            | StatusCode::Cancelled
            | StatusCode::DeadlineExceeded
            | StatusCode::InvalidSyntax
            | StatusCode::DatabaseAlreadyExists
            | StatusCode::PlanQuery
@@ -180,7 +177,6 @@ impl StatusCode {
            | StatusCode::Unexpected
            | StatusCode::Internal
            | StatusCode::Cancelled
            | StatusCode::DeadlineExceeded
            | StatusCode::IllegalState
            | StatusCode::EngineExecuteQuery
            | StatusCode::StorageUnavailable
@@ -276,7 +272,6 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
            Code::InvalidArgument
        }
        StatusCode::Cancelled => Code::Cancelled,
        StatusCode::DeadlineExceeded => Code::DeadlineExceeded,
        StatusCode::TableAlreadyExists
        | StatusCode::TableColumnExists
        | StatusCode::RegionAlreadyExists
@@ -304,15 +299,6 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
    }
 }
 /// Converts tonic [Code] to [StatusCode].
 pub fn convert_tonic_code_to_status_code(code: Code) -> StatusCode {
    match code {
        Code::Cancelled => StatusCode::Cancelled,
        Code::DeadlineExceeded => StatusCode::DeadlineExceeded,
        _ => StatusCode::Internal,
    }
 }
 #[cfg(test)]
 mod tests {
    use strum::IntoEnumIterator;
--- a/src/common/error/tests/ext.rs
+++ b/src/common/error/tests/ext.rs
@@ -1,115 +0,0 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::any::Any;
 use common_error::ext::{ErrorExt, PlainError, StackError};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use snafu::{Location, ResultExt, Snafu};
 #[derive(Snafu)]
 #[stack_trace_debug]
 enum MyError {
    #[snafu(display(r#"A normal error with "display" attribute, message "{}""#, message))]
    Normal {
        message: String,
        #[snafu(source)]
        error: PlainError,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(transparent)]
    Transparent {
        #[snafu(source)]
        error: PlainError,
        #[snafu(implicit)]
        location: Location,
    },
 }
 impl ErrorExt for MyError {
    fn status_code(&self) -> StatusCode {
        StatusCode::Unexpected
    }
    fn as_any(&self) -> &dyn Any {
        self
    }
 }
 fn normal_error() -> Result<(), MyError> {
    let plain_error = PlainError::new("<root cause>".to_string(), StatusCode::Unexpected);
    Err(plain_error).context(NormalSnafu { message: "blabla" })
 }
 fn transparent_error() -> Result<(), MyError> {
    let plain_error = PlainError::new("<root cause>".to_string(), StatusCode::Unexpected);
    Err(plain_error)?
 }
 #[test]
 fn test_output_msg() {
    let result = normal_error();
    assert_eq!(
        result.unwrap_err().output_msg(),
        r#"A normal error with "display" attribute, message "blabla": <root cause>"#
    );
    let result = transparent_error();
    assert_eq!(result.unwrap_err().output_msg(), "<root cause>");
 }
 #[test]
 fn test_to_string() {
    let result = normal_error();
    assert_eq!(
        result.unwrap_err().to_string(),
        r#"A normal error with "display" attribute, message "blabla""#
    );
    let result = transparent_error();
    assert_eq!(result.unwrap_err().to_string(), "<root cause>");
 }
 #[test]
 fn test_debug_format() {
    let result = normal_error();
    let debug_output = format!("{:?}", result.unwrap_err());
    let normalized_output = debug_output.replace('\\', "/");
    assert_eq!(
        normalized_output,
        r#"0: A normal error with "display" attribute, message "blabla", at src/common/error/tests/ext.rs:55:22
 1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
    );
    let result = transparent_error();
    let debug_output = format!("{:?}", result.unwrap_err());
    let normalized_output = debug_output.replace('\\', "/");
    assert_eq!(
        normalized_output,
        r#"0: <transparent>, at src/common/error/tests/ext.rs:60:5
 1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
    );
 }
 #[test]
 fn test_transparent_flag() {
    let result = normal_error();
    assert!(!result.unwrap_err().transparent());
    let result = transparent_error();
    assert!(result.unwrap_err().transparent());
 }
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true
 workspace = true
 [features]
 testing = []
 default = ["geo"]
 geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
@@ -18,7 +17,6 @@ api.workspace = true
 arc-swap = "1.0"
 async-trait.workspace = true
 bincode = "1.3"
 catalog.workspace = true
 chrono.workspace = true
 common-base.workspace = true
 common-catalog.workspace = true
--- a/src/common/function/src/admin/migrate_region.rs
+++ b/src/common/function/src/admin/migrate_region.rs
@@ -25,13 +25,12 @@ use session::context::QueryContextRef;
 use crate::handlers::ProcedureServiceHandlerRef;
 use crate::helper::cast_u64;
-/// The default timeout for migrate region procedure.
+const DEFAULT_TIMEOUT_SECS: u64 = 30;
 const DEFAULT_TIMEOUT_SECS: u64 = 300;
 /// A function to migrate a region from source peer to target peer.
 /// Returns the submitted procedure id if success. Only available in cluster mode.
 ///
-/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(300 seconds).
+/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
 /// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
 ///
 /// The parameters:
--- a/src/common/function/src/function.rs
+++ b/src/common/function/src/function.rs
@@ -32,7 +32,7 @@ pub struct FunctionContext {
 impl FunctionContext {
    /// Create a mock [`FunctionContext`] for test.
-    #[cfg(any(test, feature = "testing"))]
+    #[cfg(test)]
    pub fn mock() -> Self {
        Self {
            query_ctx: QueryContextBuilder::default().build().into(),
--- a/src/common/function/src/handlers.rs
+++ b/src/common/function/src/handlers.rs
@@ -15,7 +15,6 @@
 use std::sync::Arc;
 use async_trait::async_trait;
 use catalog::CatalogManagerRef;
 use common_base::AffectedRows;
 use common_meta::rpc::procedure::{
    AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
@@ -73,9 +72,6 @@ pub trait ProcedureServiceHandler: Send + Sync {
    /// Remove a region follower from a region.
    async fn remove_region_follower(&self, request: RemoveRegionFollowerRequest) -> Result<()>;
    /// Get the catalog manager
    fn catalog_manager(&self) -> &CatalogManagerRef;
 }
 /// This flow service handler is only use for flush flow for now.
--- a/src/common/function/src/scalars/geo/encoding.rs
+++ b/src/common/function/src/scalars/geo/encoding.rs
@@ -27,7 +27,7 @@ use datatypes::value::{ListValue, Value};
 use datatypes::vectors::VectorRef;
 use snafu::{ensure, ResultExt};
-use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
+use super::helpers::{ensure_columns_len, ensure_columns_n};
 /// Accumulator of lat, lng, timestamp tuples
 #[derive(Debug)]
--- a/src/common/function/src/scalars/geo/h3.rs
+++ b/src/common/function/src/scalars/geo/h3.rs
@@ -31,8 +31,8 @@ use h3o::{CellIndex, LatLng, Resolution};
 use once_cell::sync::Lazy;
 use snafu::ResultExt;
 use super::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
 use crate::function::{Function, FunctionContext};
 use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
 static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
    vec![
--- a/src/common/function/src/scalars/geo/measure.rs
+++ b/src/common/function/src/scalars/geo/measure.rs
@@ -26,9 +26,9 @@ use geo::{Area, Distance, Haversine};
 use geo_types::Geometry;
 use snafu::ResultExt;
 use super::helpers::{ensure_columns_len, ensure_columns_n};
 use super::wkt::parse_wkt;
 use crate::function::{Function, FunctionContext};
 use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
 use crate::scalars::geo::wkt::parse_wkt;
 /// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
 #[derive(Clone, Debug, Default, Display)]
--- a/src/common/function/src/scalars/geo/relation.rs
+++ b/src/common/function/src/scalars/geo/relation.rs
@@ -23,9 +23,9 @@ use geo::algorithm::contains::Contains;
 use geo::algorithm::intersects::Intersects;
 use geo::algorithm::within::Within;
 use super::helpers::{ensure_columns_len, ensure_columns_n};
 use super::wkt::parse_wkt;
 use crate::function::{Function, FunctionContext};
 use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
 use crate::scalars::geo::wkt::parse_wkt;
 /// Test if spatial relationship: contains
 #[derive(Clone, Debug, Default, Display)]
--- a/src/common/function/src/scalars/geo/wkt.rs
+++ b/src/common/function/src/scalars/geo/wkt.rs
@@ -26,8 +26,8 @@ use once_cell::sync::Lazy;
 use snafu::ResultExt;
 use wkt::{ToWkt, TryFromWkt};
 use super::helpers::{ensure_columns_len, ensure_columns_n};
 use crate::function::{Function, FunctionContext};
 use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
 static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
    vec![
--- a/src/common/function/src/scalars/uddsketch_calc.rs
+++ b/src/common/function/src/scalars/uddsketch_calc.rs
@@ -115,13 +115,6 @@ impl Function for UddSketchCalcFunction {
                }
            };
            // Check if the sketch is empty, if so, return null
            // This is important to avoid panics when calling estimate_quantile on an empty sketch
            // In practice, this will happen if input is all null
            if sketch.bucket_iter().count() == 0 {
                builder.push_null();
                continue;
            }
            // Compute the estimated quantile from the sketch
            let result = sketch.estimate_quantile(perc);
            builder.push(Some(result));
--- a/src/common/function/src/scalars/udf.rs
+++ b/src/common/function/src/scalars/udf.rs
@@ -163,7 +163,7 @@ mod tests {
        ];
        let args = ScalarFunctionArgs {
-            args,
+            args: &args,
            number_rows: 4,
            return_type: &ConcreteDataType::boolean_datatype().as_arrow_type(),
        };
--- a/src/common/function/src/state.rs
+++ b/src/common/function/src/state.rs
@@ -28,13 +28,12 @@ pub struct FunctionState {
 impl FunctionState {
    /// Create a mock [`FunctionState`] for test.
-    #[cfg(any(test, feature = "testing"))]
+    #[cfg(test)]
    pub fn mock() -> Self {
        use std::sync::Arc;
        use api::v1::meta::ProcedureStatus;
        use async_trait::async_trait;
        use catalog::CatalogManagerRef;
        use common_base::AffectedRows;
        use common_meta::rpc::procedure::{
            AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
@@ -81,10 +80,6 @@ impl FunctionState {
            ) -> Result<()> {
                Ok(())
            }
            fn catalog_manager(&self) -> &CatalogManagerRef {
                unimplemented!()
            }
        }
        #[async_trait]
--- a/src/common/grpc/Cargo.toml
+++ b/src/common/grpc/Cargo.toml
@@ -23,11 +23,8 @@ flatbuffers = "24"
 hyper.workspace = true
 lazy_static.workspace = true
 prost.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 snafu.workspace = true
 tokio.workspace = true
 tokio-util.workspace = true
 tonic.workspace = true
 tower.workspace = true
--- a/src/common/grpc/src/channel_manager.rs
+++ b/src/common/grpc/src/channel_manager.rs
@@ -22,7 +22,6 @@ use dashmap::mapref::entry::Entry;
 use dashmap::DashMap;
 use lazy_static::lazy_static;
 use snafu::{OptionExt, ResultExt};
 use tokio_util::sync::CancellationToken;
 use tonic::transport::{
    Certificate, Channel as InnerChannel, ClientTlsConfig, Endpoint, Identity, Uri,
 };
@@ -40,48 +39,18 @@ lazy_static! {
    static ref ID: AtomicU64 = AtomicU64::new(0);
 }
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug)]
 pub struct ChannelManager {
    inner: Arc<Inner>,
 }
 #[derive(Debug)]
 struct Inner {
    id: u64,
    config: ChannelConfig,
    client_tls_config: Option<ClientTlsConfig>,
    pool: Arc<Pool>,
-    channel_recycle_started: AtomicBool,
+    channel_recycle_started: Arc<AtomicBool>,
    cancel: CancellationToken,
 }
-impl Default for Inner {
+impl Default for ChannelManager {
    fn default() -> Self {
-        Self::with_config(ChannelConfig::default())
+        ChannelManager::with_config(ChannelConfig::default())
    }
 }
 impl Drop for Inner {
    fn drop(&mut self) {
        // Cancel the channel recycle task.
        self.cancel.cancel();
    }
 }
 impl Inner {
    fn with_config(config: ChannelConfig) -> Self {
        let id = ID.fetch_add(1, Ordering::Relaxed);
        let pool = Arc::new(Pool::default());
        let cancel = CancellationToken::new();
        Self {
            id,
            config,
            client_tls_config: None,
            pool,
            channel_recycle_started: AtomicBool::new(false),
            cancel,
        }
    }
 }
@@ -91,14 +60,19 @@ impl ChannelManager {
    }
    pub fn with_config(config: ChannelConfig) -> Self {
-        let inner = Inner::with_config(config);
+        let id = ID.fetch_add(1, Ordering::Relaxed);
        let pool = Arc::new(Pool::default());
        Self {
-            inner: Arc::new(inner),
+            id,
            config,
            client_tls_config: None,
            pool,
            channel_recycle_started: Arc::new(AtomicBool::new(false)),
        }
    }
    pub fn with_tls_config(config: ChannelConfig) -> Result<Self> {
-        let mut inner = Inner::with_config(config.clone());
+        let mut cm = Self::with_config(config.clone());
        // setup tls
        let path_config = config.client_tls.context(InvalidTlsConfigSnafu {
@@ -114,23 +88,17 @@ impl ChannelManager {
            .context(InvalidConfigFilePathSnafu)?;
        let client_identity = Identity::from_pem(client_cert, client_key);
-        inner.client_tls_config = Some(
+        cm.client_tls_config = Some(
            ClientTlsConfig::new()
                .ca_certificate(server_root_ca_cert)
                .identity(client_identity),
        );
-        Ok(Self {
+        Ok(cm)
            inner: Arc::new(inner),
        })
    }
    pub fn config(&self) -> &ChannelConfig {
-        &self.inner.config
+        &self.config
    }
    fn pool(&self) -> &Arc<Pool> {
        &self.inner.pool
    }
    pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
@@ -138,12 +106,12 @@ impl ChannelManager {
        let addr = addr.as_ref();
        // It will acquire the read lock.
-        if let Some(inner_ch) = self.pool().get(addr) {
+        if let Some(inner_ch) = self.pool.get(addr) {
            return Ok(inner_ch);
        }
        // It will acquire the write lock.
-        let entry = match self.pool().entry(addr.to_string()) {
+        let entry = match self.pool.entry(addr.to_string()) {
            Entry::Occupied(entry) => {
                entry.get().increase_access();
                entry.into_ref()
@@ -182,7 +150,7 @@ impl ChannelManager {
            access: AtomicUsize::new(1),
            use_default_connector: false,
        };
-        self.pool().put(addr, channel);
+        self.pool.put(addr, channel);
        Ok(inner_channel)
    }
@@ -191,11 +159,11 @@ impl ChannelManager {
    where
        F: FnMut(&String, &mut Channel) -> bool,
    {
-        self.pool().retain_channel(f);
+        self.pool.retain_channel(f);
    }
    fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
-        let http_prefix = if self.inner.client_tls_config.is_some() {
+        let http_prefix = if self.client_tls_config.is_some() {
            "https"
        } else {
            "http"
@@ -204,52 +172,51 @@ impl ChannelManager {
        let mut endpoint =
            Endpoint::new(format!("{http_prefix}://{addr}")).context(CreateChannelSnafu)?;
-        if let Some(dur) = self.config().timeout {
+        if let Some(dur) = self.config.timeout {
            endpoint = endpoint.timeout(dur);
        }
-        if let Some(dur) = self.config().connect_timeout {
+        if let Some(dur) = self.config.connect_timeout {
            endpoint = endpoint.connect_timeout(dur);
        }
-        if let Some(limit) = self.config().concurrency_limit {
+        if let Some(limit) = self.config.concurrency_limit {
            endpoint = endpoint.concurrency_limit(limit);
        }
-        if let Some((limit, dur)) = self.config().rate_limit {
+        if let Some((limit, dur)) = self.config.rate_limit {
            endpoint = endpoint.rate_limit(limit, dur);
        }
-        if let Some(size) = self.config().initial_stream_window_size {
+        if let Some(size) = self.config.initial_stream_window_size {
            endpoint = endpoint.initial_stream_window_size(size);
        }
-        if let Some(size) = self.config().initial_connection_window_size {
+        if let Some(size) = self.config.initial_connection_window_size {
            endpoint = endpoint.initial_connection_window_size(size);
        }
-        if let Some(dur) = self.config().http2_keep_alive_interval {
+        if let Some(dur) = self.config.http2_keep_alive_interval {
            endpoint = endpoint.http2_keep_alive_interval(dur);
        }
-        if let Some(dur) = self.config().http2_keep_alive_timeout {
+        if let Some(dur) = self.config.http2_keep_alive_timeout {
            endpoint = endpoint.keep_alive_timeout(dur);
        }
-        if let Some(enabled) = self.config().http2_keep_alive_while_idle {
+        if let Some(enabled) = self.config.http2_keep_alive_while_idle {
            endpoint = endpoint.keep_alive_while_idle(enabled);
        }
-        if let Some(enabled) = self.config().http2_adaptive_window {
+        if let Some(enabled) = self.config.http2_adaptive_window {
            endpoint = endpoint.http2_adaptive_window(enabled);
        }
-        if let Some(tls_config) = &self.inner.client_tls_config {
+        if let Some(tls_config) = &self.client_tls_config {
            endpoint = endpoint
                .tls_config(tls_config.clone())
                .context(CreateChannelSnafu)?;
        }
        endpoint = endpoint
-            .tcp_keepalive(self.config().tcp_keepalive)
+            .tcp_keepalive(self.config.tcp_keepalive)
-            .tcp_nodelay(self.config().tcp_nodelay);
+            .tcp_nodelay(self.config.tcp_nodelay);
        Ok(endpoint)
    }
    fn trigger_channel_recycling(&self) {
        if self
            .inner
            .channel_recycle_started
            .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
            .is_err()
@@ -257,15 +224,13 @@ impl ChannelManager {
            return;
        }
-        let pool = self.pool().clone();
+        let pool = self.pool.clone();
-        let cancel = self.inner.cancel.clone();
+        let _handle = common_runtime::spawn_global(async {
-        let id = self.inner.id;
+            recycle_channel_in_loop(pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
        let _handle = common_runtime::spawn_global(async move {
            recycle_channel_in_loop(pool, id, cancel, RECYCLE_CHANNEL_INTERVAL_SECS).await;
        });
        info!(
            "ChannelManager: {}, channel recycle is started, running in the background!",
-            self.inner.id
+            self.id
        );
    }
 }
@@ -478,23 +443,11 @@ impl Pool {
    }
 }
-async fn recycle_channel_in_loop(
+async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
    pool: Arc<Pool>,
    id: u64,
    cancel: CancellationToken,
    interval_secs: u64,
 ) {
    let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
    loop {
-        tokio::select! {
+        let _ = interval.tick().await;
            _ = cancel.cancelled() => {
                info!("Stop channel recycle, ChannelManager id: {}", id);
                break;
            },
            _ = interval.tick() => {}
        }
        pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
    }
 }
@@ -508,7 +461,11 @@ mod tests {
    #[should_panic]
    #[test]
    fn test_invalid_addr() {
-        let mgr = ChannelManager::default();
+        let pool = Arc::new(Pool::default());
        let mgr = ChannelManager {
            pool,
            ..Default::default()
        };
        let addr = "http://test";
        let _ = mgr.get(addr).unwrap();
@@ -518,9 +475,7 @@ mod tests {
    async fn test_access_count() {
        let mgr = ChannelManager::new();
        // Do not start recycle
-        mgr.inner
+        mgr.channel_recycle_started.store(true, Ordering::Relaxed);
            .channel_recycle_started
            .store(true, Ordering::Relaxed);
        let mgr = Arc::new(mgr);
        let addr = "test_uri";
@@ -538,12 +493,12 @@ mod tests {
            join.await.unwrap();
        }
-        assert_eq!(1000, mgr.pool().get_access(addr).unwrap());
+        assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
-        mgr.pool()
+        mgr.pool
            .retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);
-        assert_eq!(0, mgr.pool().get_access(addr).unwrap());
+        assert_eq!(0, mgr.pool.get_access(addr).unwrap());
    }
    #[test]
@@ -669,49 +624,4 @@ mod tests {
            true
        });
    }
    #[tokio::test]
    async fn test_pool_release_with_channel_recycle() {
        let mgr = ChannelManager::new();
        let pool_holder = mgr.pool().clone();
        // start channel recycle task
        let addr = "test_addr";
        let _ = mgr.get(addr);
        let mgr_clone_1 = mgr.clone();
        let mgr_clone_2 = mgr.clone();
        assert_eq!(3, Arc::strong_count(mgr.pool()));
        drop(mgr_clone_1);
        drop(mgr_clone_2);
        assert_eq!(3, Arc::strong_count(mgr.pool()));
        drop(mgr);
        // wait for the channel recycle task to finish
        tokio::time::sleep(Duration::from_millis(10)).await;
        assert_eq!(1, Arc::strong_count(&pool_holder));
    }
    #[tokio::test]
    async fn test_pool_release_without_channel_recycle() {
        let mgr = ChannelManager::new();
        let pool_holder = mgr.pool().clone();
        let mgr_clone_1 = mgr.clone();
        let mgr_clone_2 = mgr.clone();
        assert_eq!(2, Arc::strong_count(mgr.pool()));
        drop(mgr_clone_1);
        drop(mgr_clone_2);
        assert_eq!(2, Arc::strong_count(mgr.pool()));
        drop(mgr);
        assert_eq!(1, Arc::strong_count(&pool_holder));
    }
 }
--- a/src/common/grpc/src/error.rs
+++ b/src/common/grpc/src/error.rs
@@ -97,14 +97,6 @@ pub enum Error {
    #[snafu(display("Not supported: {}", feat))]
    NotSupported { feat: String },
    #[snafu(display("Failed to serde Json"))]
    SerdeJson {
        #[snafu(source)]
        error: serde_json::error::Error,
        #[snafu(implicit)]
        location: Location,
    },
 }
 impl ErrorExt for Error {
@@ -118,8 +110,7 @@ impl ErrorExt for Error {
            Error::CreateChannel { .. }
            | Error::Conversion { .. }
-            | Error::DecodeFlightData { .. }
+            | Error::DecodeFlightData { .. } => StatusCode::Internal,
            | Error::SerdeJson { .. } => StatusCode::Internal,
            Error::CreateRecordBatch { source, .. } => source.status_code(),
            Error::ConvertArrowSchema { source, .. } => source.status_code(),
--- a/src/common/grpc/src/flight.rs
+++ b/src/common/grpc/src/flight.rs
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 pub mod do_put;
 use std::collections::HashMap;
 use std::sync::Arc;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ruihang Xia	f3a02effa7	assign partition_ranges Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2025-04-08 21:36:29 +08:00
Ruihang Xia	52f9fc25ba	Revert "feat: keep parallelize_scan unchanged" This reverts commit `96ba00d175`.	2025-04-08 21:16:09 +08:00
evenyag	214a16565a	chore: update comment	2025-04-08 21:00:25 +08:00
evenyag	21790a607e	feat: use smallvec	2025-04-08 20:54:34 +08:00
evenyag	b33d8c1bad	fix: include build merge reader cost to scan cost	2025-04-08 20:52:19 +08:00
evenyag	916e1c2d9e	fix: address compiler errors	2025-04-08 20:51:55 +08:00
evenyag	96ba00d175	feat: keep parallelize_scan unchanged	2025-04-08 20:36:48 +08:00
evenyag	7173401732	fix: use series scan in PerSeries distribution	2025-04-08 20:34:08 +08:00
evenyag	17c797a6d0	refactor: remove per series scan from SeqScan	2025-04-08 20:34:06 +08:00
evenyag	c44ba1aa69	feat: parallelize PerSeries	2025-04-08 20:26:50 +08:00
evenyag	843d33f9d0	feat: use series scan when distribution is PerSeries	2025-04-08 20:26:50 +08:00
evenyag	b74e2a7d9b	feat: implement scan logic of each partition	2025-04-08 20:26:47 +08:00
evenyag	4a79c1527d	chore: add to scanner enum	2025-04-08 20:24:54 +08:00
evenyag	b7a6ff9cc3	chore: basic methods for SeriesScan	2025-04-08 20:24:54 +08:00