Merge branch 'main' into prom-plan-commutativity

2026-01-05 21:02:58 +00:00 · 2025-04-27 17:40:24 +08:00
parent 0f521956bf 3c943be189
commit 99b352cea1
445 changed files with 32973 additions and 15741 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -1,15 +0,0 @@
-# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
-language: "en-US"
-early_access: false
-reviews:
-  profile: "chill"
-  request_changes_workflow: false
-  high_level_summary: true
-  poem: true
-  review_status: true
-  collapse_walkthrough: false
-  auto_review:
-    enabled: false
-    drafts: false
-chat:
-  auto_reply: true
--- a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
@@ -2,13 +2,14 @@ meta:
  configData: |-
    [runtime]
    global_rt_size = 4
-    
+
    [wal]
    provider = "kafka"
    broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
    num_topics = 3
+    auto_prune_interval = "30s"
+    trigger_flush_threshold = 100

-        
    [datanode]
    [datanode.client]
    timeout = "120s"
@@ -22,6 +23,7 @@ datanode:
    provider = "kafka"
    broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
    linger = "2ms"
+    overwrite_entry_start_id = true
 frontend:
  configData: |-
    [runtime]
--- a/.github/scripts/create-version.sh
+++ b/.github/scripts/create-version.sh
@@ -10,22 +10,22 @@ set -e
 function create_version() {
  # Read from envrionment variables.
  if [ -z "$GITHUB_EVENT_NAME" ]; then
-      echo "GITHUB_EVENT_NAME is empty"
+      echo "GITHUB_EVENT_NAME is empty" >&2
      exit 1
  fi

  if [ -z "$NEXT_RELEASE_VERSION" ]; then
-      echo "NEXT_RELEASE_VERSION is empty"
-      exit 1
+      echo "NEXT_RELEASE_VERSION is empty, use version from Cargo.toml" >&2
+      export NEXT_RELEASE_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
  fi

  if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then
-      echo "NIGHTLY_RELEASE_PREFIX is empty"
+      echo "NIGHTLY_RELEASE_PREFIX is empty" >&2
      exit 1
  fi

  # Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
-  # It will be like 'nigtly-20230808-7d0d8dc6'.
+  # It will be like 'nightly-20230808-7d0d8dc6'.
  if [ "$NEXT_RELEASE_VERSION" = nightly ]; then
    echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
    exit 0
@@ -35,7 +35,7 @@ function create_version() {
  # It will be like 'dev-2023080819-f0e7216c'.
  if [ "$NEXT_RELEASE_VERSION" = dev ]; then
    if [ -z "$COMMIT_SHA" ]; then
-      echo "COMMIT_SHA is empty in dev build"
+      echo "COMMIT_SHA is empty in dev build" >&2
      exit 1
    fi
    echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)"
@@ -45,7 +45,7 @@ function create_version() {
  # Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs.
  if [ "$GITHUB_EVENT_NAME" = push ]; then
    if [ -z "$GITHUB_REF_NAME" ]; then
-      echo "GITHUB_REF_NAME is empty in push event"
+      echo "GITHUB_REF_NAME is empty in push event" >&2
      exit 1
    fi
    echo "$GITHUB_REF_NAME"
@@ -54,15 +54,15 @@ function create_version() {
  elif [ "$GITHUB_EVENT_NAME" = schedule ]; then
    echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")"
  else
-    echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME"
+    echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" >&2
    exit 1
  fi
 }

 # You can run as following examples:
-#  GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
-#  GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
-#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
-#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
-#  GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
+#  GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
+#  GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
+#  GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
 create_version
--- a/.github/workflows/grafana.yml
+++ b/.github/workflows/grafana.yml
@@ -21,32 +21,6 @@ jobs:
        run: sudo apt-get install -y jq

      # Make the check.sh script executable
-      - name: Make check.sh executable
-        run: chmod +x grafana/check.sh
-
-      # Run the check.sh script
-      - name: Run check.sh
-        run: ./grafana/check.sh
-
-      # Only run summary.sh for pull_request events (not for merge queues or final pushes)
-      - name: Check if this is a pull request
-        id: check-pr
+      - name: Check grafana dashboards
        run: |
-          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
-            echo "is_pull_request=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_pull_request=false" >> $GITHUB_OUTPUT
-          fi
-
-      # Make the summary.sh script executable
-      - name: Make summary.sh executable
-        if: steps.check-pr.outputs.is_pull_request == 'true'
-        run: chmod +x grafana/summary.sh
-
-      # Run the summary.sh script and add its output to the GitHub Job Summary
-      - name: Run summary.sh and add to Job Summary
-        if: steps.check-pr.outputs.is_pull_request == 'true'
-        run: |
-          SUMMARY=$(./grafana/summary.sh)
-          echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
-          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
+          make check-dashboards
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -90,8 +90,6 @@ env:

  # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
  NIGHTLY_RELEASE_PREFIX: nightly
-  # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
-  NEXT_RELEASE_VERSION: v0.14.0

 jobs:
  allocate-runners:
@@ -135,7 +133,6 @@ jobs:
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_REF_NAME: ${{ github.ref_name }}
-          NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
          NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}

      - name: Allocate linux-amd64 runner
@@ -317,7 +314,7 @@ jobs:
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: true
+          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

      - name: Set build image result
        id: set-build-image-result
@@ -364,7 +361,7 @@ jobs:
          dev-mode: false
          upload-to-s3: true
          update-version-info: true
-          push-latest-tag: true
+          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

  publish-github-release:
    name: Create GitHub release and upload artifacts
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -68,16 +68,16 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.14.0"
+version = "0.15.0"
 edition = "2021"
 license = "Apache-2.0"

 [workspace.lints]
-clippy.print_stdout = "warn"
-clippy.print_stderr = "warn"
 clippy.dbg_macro = "warn"
 clippy.implicit_clone = "warn"
-clippy.readonly_write_lock = "allow"
+clippy.result_large_err = "allow"
+clippy.large_enum_variant = "allow"
+clippy.doc_overindented_list_items = "allow"
 rust.unknown_lints = "deny"
 rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }

@@ -113,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
-datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
-datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "07dd0bee9e524d83228847c15af6c12f438349ab" }
+datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
 deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
@@ -130,7 +130,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "dd4a1996982534636734674db66e44464b0c0d83" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "e82b0158cd38d4021edb4e4c0ae77f999051e62f" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -148,7 +148,7 @@ moka = "0.12"
 nalgebra = "0.33"
 notify = "8.0"
 num_cpus = "1.16"
-object_store_opendal = "0.49.0"
+object_store_opendal = "0.50"
 once_cell = "1.18"
 opentelemetry-proto = { version = "0.27", features = [
    "gen-tonic",
@@ -162,7 +162,7 @@ parquet = { version = "54.2", default-features = false, features = ["arrow", "as
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.5", features = ["ser"] }
+promql-parser = { version = "0.5.1", features = ["ser"] }
 prost = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
@@ -192,7 +192,7 @@ simd-json = "0.15"
 similar-asserts = "1.6.0"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e98e6b322426a9d397a71efef17075966223c089", features = [
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [
    "visitor",
    "serde",
 ] } # branch = "v0.54.x"
@@ -270,6 +270,9 @@ metric-engine = { path = "src/metric-engine" }
 mito2 = { path = "src/mito2" }
 object-store = { path = "src/object-store" }
 operator = { path = "src/operator" }
+otel-arrow-rust = { git = "https://github.com/open-telemetry/otel-arrow", rev = "5d551412d2a12e689cde4d84c14ef29e36784e51", features = [
+    "server",
+] }
 partition = { path = "src/partition" }
 pipeline = { path = "src/pipeline" }
 plugins = { path = "src/plugins" }
--- a/14
+++ b/14
@@ -32,6 +32,10 @@ ifneq ($(strip $(BUILD_JOBS)),)
 	NEXTEST_OPTS += --build-jobs=${BUILD_JOBS}
 endif

+ifneq ($(strip $(BUILD_JOBS)),)
+	SQLNESS_OPTS += --jobs ${BUILD_JOBS}
+endif
+
 ifneq ($(strip $(CARGO_PROFILE)),)
 	CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE}
 endif
@@ -218,6 +222,16 @@ start-cluster: ## Start the greptimedb cluster with etcd by using docker compose
 stop-cluster: ## Stop the greptimedb cluster that created by docker compose.
 	docker compose -f ./docker/docker-compose/cluster-with-etcd.yaml stop

+##@ Grafana
+
+.PHONY: check-dashboards
+check-dashboards: ## Check the Grafana dashboards.
+	@./grafana/scripts/check.sh
+
+.PHONY: dashboards
+dashboards: ## Generate the Grafana dashboards for standalone mode and intermediate dashboards.
+	@./grafana/scripts/gen-dashboards.sh
+
 ##@ Docs
 config-docs: ## Generate configuration documentation from toml files.
 	docker run --rm \
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
  </picture>
 </p>

-<h2 align="center">Unified & Cost-Effective Observerability Database for Metrics, Logs, and Events</h2>
+<h2 align="center">Real-Time & Cloud-Native Observability  Database<br/>for metrics, logs, and traces</h2>

 <div align="center">
 <h3 align="center">
@@ -62,7 +62,7 @@

 ## Introduction

-**GreptimeDB** is an open-source unified & cost-effective observerability database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
+**GreptimeDB** is an open-source, cloud-native, unified & cost-effective observability database for **Metrics**, **Logs**, and **Traces**. You can gain real-time insights from Edge to Cloud at Any Scale.

 ## News

@@ -70,27 +70,27 @@

 ## Why GreptimeDB

-Our core developers have been building observerability data platforms for years. Based on our best practices, GreptimeDB was born to give you:
+Our core developers have been building observability data platforms for years. Based on our best practices, GreptimeDB was born to give you:

-* **Unified Processing of Metrics, Logs, and Events**
+* **Unified Processing of Observability Data**

-  GreptimeDB unifies observerability data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
+  A unified database that treats metrics, logs, and traces as timestamped wide events with context, supporting [SQL](https://docs.greptime.com/user-guide/query-data/sql)/[PromQL](https://docs.greptime.com/user-guide/query-data/promql) queries and [stream processing](https://docs.greptime.com/user-guide/flow-computation/overview) to simplify complex data stacks.
+
+* **High Performance and Cost-effective**
+
+   Written in Rust, combines a distributed query engine with [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index) (inverted, fulltext, skip data, and vector) and optimized columnar storage to deliver sub-second responses on petabyte-scale data and high-cost efficiency.

 * **Cloud-native Distributed Database**

  Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.

-* **Performance and Cost-effective**
+* **Developer-Friendly**

-  Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
+  Access standardized SQL/PromQL interfaces through built-in web dashboard, REST API, and MySQL/PostgreSQL protocols. Supports widely adopted data ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview) for seamless migration and integration.

-* **Cloud-Edge Collaboration**
+* **Flexible Deployment Options**

-  GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
-
-* **Multi-protocol Ingestion, SQL & PromQL Ready**
-
-  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc.  Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
+  Deploy GreptimeDB anywhere from ARM-based edge devices to cloud environments with unified APIs and bandwidth-efficient data synchronization. Query edge and cloud data seamlessly through identical APIs. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).

 For more detailed info please read  [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb).

@@ -233,3 +233,5 @@ Special thanks to all the contributors who have propelled GreptimeDB forward. Fo
 - GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
 - [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
 - GreptimeDB's meta service is based on [etcd](https://etcd.io/).
+
+<img alt="Known Users" src="https://greptime.com/logo/img/users.png"/>
--- a/config/config.md
+++ b/config/config.md
@@ -96,6 +96,8 @@
 | `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
 | `flow` | -- | -- | flow engine options. |
 | `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
+| `query` | -- | -- | The query engine options. |
+| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `storage` | -- | -- | The data storage options. |
 | `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -270,6 +272,8 @@
 | `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
 | `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
 | `meta_client.metadata_cache_tti` | String | `5m` | -- |
+| `query` | -- | -- | The query engine options. |
+| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `datanode` | -- | -- | Datanode options. |
 | `datanode.client` | -- | -- | Datanode client options. |
 | `datanode.client.connect_timeout` | String | `10s` | -- |
@@ -315,6 +319,7 @@
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
+| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
 | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
@@ -339,6 +344,9 @@
 | `wal.provider` | String | `raft_engine` | -- |
 | `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
 | `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
+| `wal.auto_prune_interval` | String | `0s` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically. |
+| `wal.trigger_flush_threshold` | Integer | `0` | The threshold to trigger a flush operation of a region in automatically WAL pruning.<br/>Metasrv will send a flush request to flush the region when:<br/>`trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`<br/>where:<br/>- `prunable_entry_id` is the maximum entry id that can be pruned of the region.<br/>- `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.<br/>Set to `0` to disable the flush operation. |
+| `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning. |
 | `wal.num_topics` | Integer | `64` | Number of topics. |
 | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
 | `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
@@ -429,6 +437,8 @@
 | `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
+| `query` | -- | -- | The query engine options. |
+| `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `storage` | -- | -- | The data storage options. |
 | `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -243,6 +243,12 @@ overwrite_entry_start_id = false
 # credential = "base64-credential"
 # endpoint = "https://storage.googleapis.com"

+## The query engine options.
+[query]
+## Parallelism of the query engine.
+## Default to 0, which means the number of CPU cores.
+parallelism = 0
+
 ## The data storage options.
 [storage]
 ## The working home directory.
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -179,6 +179,12 @@ metadata_cache_ttl = "10m"
 # TTI of the metadata cache.
 metadata_cache_tti = "5m"

+## The query engine options.
+[query]
+## Parallelism of the query engine.
+## Default to 0, which means the number of CPU cores.
+parallelism = 0
+
 ## Datanode options.
 [datanode]
 ## Datanode client options.
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -50,6 +50,10 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

+## Whether to allow region failover on local WAL.
+## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
+allow_region_failover_on_local_wal = false
+
 ## Max allowed idle time before removing node info from metasrv memory.
 node_max_idle_time = "24hours"

@@ -130,6 +134,22 @@ broker_endpoints = ["127.0.0.1:9092"]
 ## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
 auto_create_topics = true

+## Interval of automatically WAL pruning.
+## Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.
+auto_prune_interval = "0s"
+
+## The threshold to trigger a flush operation of a region in automatically WAL pruning.
+## Metasrv will send a flush request to flush the region when:
+## `trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`
+## where:
+## - `prunable_entry_id` is the maximum entry id that can be pruned of the region.
+## - `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.
+## Set to `0` to disable the flush operation.
+trigger_flush_threshold = 0
+
+## Concurrent task limit for automatically WAL pruning.
+auto_prune_parallelism = 10
+
 ## Number of topics.
 num_topics = 64

--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -334,6 +334,12 @@ max_running_procedures = 128
 # credential = "base64-credential"
 # endpoint = "https://storage.googleapis.com"

+## The query engine options.
+[query]
+## Parallelism of the query engine.
+## Default to 0, which means the number of CPU cores.
+parallelism = 0
+
 ## The data storage options.
 [storage]
 ## The working home directory.
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -1,6 +1,6 @@
 # Profile memory usage of GreptimeDB

-This crate provides an easy approach to dump memory profiling info.
+This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](docs/how-to/memory-profile-scripts).

 ## Prerequisites
 ### jemalloc
--- a/docs/how-to/memory-profile-scripts/scripts/README.md
+++ b/docs/how-to/memory-profile-scripts/scripts/README.md
@@ -0,0 +1,52 @@
+# Memory Analysis Process
+This section will guide you through the process of analyzing memory usage for greptimedb.
+
+1. Get the `jeprof` tool script, see the next section("Getting the `jeprof` tool") for details.
+
+2. After starting `greptimedb`(with env var `MALLOC_CONF=prof:true`), execute the `dump.sh` script with the PID of the `greptimedb` process as an argument. This continuously monitors memory usage and captures profiles when exceeding thresholds (e.g. +20MB within 10 minutes). Outputs `greptime-{timestamp}.gprof` files.
+
+3. With 2-3 gprof files, run `gen_flamegraph.sh` in the same environment to generate flame graphs showing memory allocation call stacks.
+
+4.  **NOTE:** The `gen_flamegraph.sh` script requires `jeprof` and optionally `flamegraph.pl` to be in the current directory. If needed to gen flamegraph now, run the `get_flamegraph_tool.sh` script, which downloads the flame graph generation tool `flamegraph.pl` to the current directory.
+    The usage of `gen_flamegraph.sh` is:
+
+    `Usage: ./gen_flamegraph.sh <binary_path> <gprof_directory>`
+    where `<binary_path>` is the path to the greptimedb binary, `<gprof_directory>` is the directory containing the gprof files(the directory `dump.sh` is dumping profiles to).
+    Example call: `./gen_flamegraph.sh ./greptime .`
+
+    Generating the flame graph might take a few minutes. The generated flame graphs are located in the `<gprof_directory>/flamegraphs` directory. Or if no `flamegraph.pl` is found, it will only contain `.collapse` files which is also fine.
+5.  You can send the generated flame graphs(the entire folder of `<gprof_directory>/flamegraphs`) to developers for further analysis.
+
+
+## Getting the `jeprof` tool
+there are three ways to get `jeprof`, list in here from simple to complex, using any one of those methods is ok, as long as it's the same environment as the `greptimedb` will be running on:
+1. If you are compiling greptimedb from source, then `jeprof` is already produced during compilation. After running `cargo build`, execute `find_compiled_jeprof.sh`. This will copy `jeprof` to the current directory.
+2. Or, if you have the Rust toolchain installed locally, simply follow these commands:
+```bash
+cargo new get_jeprof
+cd get_jeprof
+```
+Then add this line to `Cargo.toml`:
+```toml
+[dependencies]
+tikv-jemalloc-ctl = { version = "0.6", features = ["use_std", "stats"] }
+```
+then run:
+```bash
+cargo build
+```
+after that the `jeprof` tool is produced. Now run `find_compiled_jeprof.sh` in current directory, it will copy the `jeprof` tool to the current directory.
+
+3. compile jemalloc from source
+you can first clone this repo, and checkout to this commit:
+```bash
+git clone https://github.com/tikv/jemalloc.git
+cd jemalloc
+git checkout e13ca993e8ccb9ba9847cc330696e02839f328f7
+```
+then run:
+```bash
+./configure
+make
+```
+and `jeprof` is in `.bin/` directory. Copy it to the current directory.
--- a/docs/how-to/memory-profile-scripts/scripts/dump.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/dump.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Monitors greptime process memory usage every 10 minutes
+# Triggers memory profile capture via `curl -X POST localhost:4000/debug/prof/mem > greptime-{timestamp}.gprof`
+# when memory increases by more than 20MB since last check
+# Generated profiles can be analyzed using flame graphs as described in `how-to-profile-memory.md`
+# (jeprof is compiled with the database - see documentation)
+# Alternative: Share binaries + profiles for analysis (Docker images preferred)
+
+# Threshold in Kilobytes (20 MB)
+threshold_kb=$((20 * 1024))
+sleep_interval=$((10 * 60))
+
+# Variable to store the last measured memory usage in KB
+last_mem_kb=0
+
+echo "Starting memory monitoring for 'greptime' process..."
+
+while true; do
+
+    # Check if PID is provided as an argument
+    if [ -z "$1" ]; then
+        echo "$(date): PID must be provided as a command-line argument."
+        exit 1
+    fi
+
+    pid="$1"
+
+    # Validate that the PID is a number
+    if ! [[ "$pid" =~ ^[0-9]+$ ]]; then
+        echo "$(date): Invalid PID: '$pid'. PID must be a number."
+        exit 1
+    fi
+
+    # Get the current Resident Set Size (RSS) in Kilobytes
+    current_mem_kb=$(ps -o rss= -p "$pid")
+
+    # Check if ps command was successful and returned a number
+    if ! [[ "$current_mem_kb" =~ ^[0-9]+$ ]]; then
+        echo "$(date): Failed to get memory usage for PID $pid. Skipping check."
+        # Keep last_mem_kb to avoid false positives if the process briefly becomes unreadable.
+        continue
+    fi
+
+    echo "$(date): Current memory usage for PID $pid: ${current_mem_kb} KB"
+
+    # Compare with the last measurement
+    # if it's the first run, also do a baseline dump just to make sure we can dump
+    
+    diff_kb=$((current_mem_kb - last_mem_kb))
+    echo "$(date): Memory usage change since last check: ${diff_kb} KB"
+
+    if [ "$diff_kb" -gt "$threshold_kb" ]; then
+        echo "$(date): Memory increase (${diff_kb} KB) exceeded threshold (${threshold_kb} KB). Dumping profile..."
+        timestamp=$(date +%Y%m%d%H%M%S)
+        profile_file="greptime-${timestamp}.gprof"
+        # Execute curl and capture output to file
+        if curl -sf -X POST localhost:4000/debug/prof/mem > "$profile_file"; then
+            echo "$(date): Memory profile saved to $profile_file"
+        else
+            echo "$(date): Failed to dump memory profile (curl exit code: $?)."
+            # Remove the potentially empty/failed profile file
+            rm -f "$profile_file"
+        fi
+    else
+            echo "$(date): Memory increase (${diff_kb} KB) is within the threshold (${threshold_kb} KB)."
+    fi
+    
+
+    # Update the last memory usage
+    last_mem_kb=$current_mem_kb
+    
+    # Wait for 5 minutes
+    echo "$(date): Sleeping for $sleep_interval seconds..."
+    sleep $sleep_interval
+done
+
+echo "Memory monitoring script stopped." # This line might not be reached in normal operation
--- a/docs/how-to/memory-profile-scripts/scripts/find_compiled_jeprof.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/find_compiled_jeprof.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Locates compiled jeprof binary (memory analysis tool) after cargo build
+# Copies it to current directory from target/ build directories
+
+JPROF_PATH=$(find . -name 'jeprof' -print -quit)
+if [ -n "$JPROF_PATH" ]; then
+  echo "Found jeprof at $JPROF_PATH"
+  cp "$JPROF_PATH" .
+  chmod +x jeprof
+  echo "Copied jeprof to current directory and made it executable."
+else
+  echo "jeprof not found"
+  exit 1
+fi
--- a/docs/how-to/memory-profile-scripts/scripts/gen_flamegraph.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/gen_flamegraph.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+# Generate flame graphs from a series of `.gprof` files
+# First argument: Path to the binary executable
+# Second argument: Path to directory containing gprof files
+# Requires `jeprof` and `flamegraph.pl` in current directory
+# What this script essentially does is:
+# ./jeprof <binary> <gprof> --collapse | ./flamegraph.pl > <output>
+# For differential analysis between consecutive profiles:
+# ./jeprof <binary> --base <gprof1> <gprof2> --collapse | ./flamegraph.pl > <output_diff>
+
+set -e # Exit immediately if a command exits with a non-zero status.
+
+# Check for required tools
+if [ ! -f "./jeprof" ]; then
+    echo "Error: jeprof not found in the current directory."
+    exit 1
+fi
+
+if [ ! -f "./flamegraph.pl" ]; then
+    echo "Error: flamegraph.pl not found in the current directory."
+    exit 1
+fi
+
+# Check arguments
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <binary_path> <gprof_directory>"
+    exit 1
+fi
+
+BINARY_PATH=$1
+GPROF_DIR=$2
+OUTPUT_DIR="${GPROF_DIR}/flamegraphs" # Store outputs in a subdirectory
+
+if [ ! -f "$BINARY_PATH" ]; then
+    echo "Error: Binary file not found at $BINARY_PATH"
+    exit 1
+fi
+
+if [ ! -d "$GPROF_DIR" ]; then
+    echo "Error: gprof directory not found at $GPROF_DIR"
+    exit 1
+fi
+
+mkdir -p "$OUTPUT_DIR"
+echo "Generating flamegraphs in $OUTPUT_DIR"
+
+# Find and sort gprof files
+# Use find + sort -V for natural sort of version numbers if present in filenames
+# Use null-terminated strings for safety with find/xargs/sort
+mapfile -d $'\0' gprof_files < <(find "$GPROF_DIR" -maxdepth 1 -name '*.gprof' -print0 | sort -zV)
+
+if [ ${#gprof_files[@]} -eq 0 ]; then
+    echo "No .gprof files found in $GPROF_DIR"
+    exit 0
+fi
+
+prev_gprof=""
+
+# Generate flamegraphs
+for gprof_file in "${gprof_files[@]}"; do
+    # Skip empty entries if any
+    if [ -z "$gprof_file" ]; then
+        continue
+    fi
+
+    filename=$(basename "$gprof_file" .gprof)
+    output_collapse="${OUTPUT_DIR}/${filename}.collapse"
+    output_svg="${OUTPUT_DIR}/${filename}.svg"
+    echo "Generating collapse file for $gprof_file -> $output_collapse"
+    ./jeprof "$BINARY_PATH" "$gprof_file" --collapse > "$output_collapse"
+    echo "Generating flamegraph for $gprof_file -> $output_svg"
+    ./flamegraph.pl "$output_collapse" > "$output_svg" || true
+
+    # Generate diff flamegraph if not the first file
+    if [ -n "$prev_gprof" ]; then
+        prev_filename=$(basename "$prev_gprof" .gprof)
+        diff_output_collapse="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.collapse"
+        diff_output_svg="${OUTPUT_DIR}/${prev_filename}_vs_${filename}_diff.svg"
+        echo "Generating diff collapse file for $prev_gprof vs $gprof_file -> $diff_output_collapse"
+        ./jeprof "$BINARY_PATH" --base "$prev_gprof" "$gprof_file" --collapse > "$diff_output_collapse"
+        echo "Generating diff flamegraph for $prev_gprof vs $gprof_file -> $diff_output_svg"
+        ./flamegraph.pl "$diff_output_collapse" > "$diff_output_svg" || true
+    fi
+
+    prev_gprof="$gprof_file"
+done
+
+echo "Flamegraph generation complete."
--- a/docs/how-to/memory-profile-scripts/scripts/gen_from_collapse.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/gen_from_collapse.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Generate flame graphs from .collapse files
+# Argument: Path to directory containing collapse files
+# Requires `flamegraph.pl` in current directory
+
+# Check if flamegraph.pl exists
+if [ ! -f "./flamegraph.pl" ]; then
+    echo "Error: flamegraph.pl not found in the current directory."
+    exit 1
+fi
+
+# Check if directory argument is provided
+if [ -z "$1" ]; then
+    echo "Usage: $0 <collapse_directory>"
+    exit 1
+fi
+
+COLLAPSE_DIR=$1
+
+# Check if the provided argument is a directory
+if [ ! -d "$COLLAPSE_DIR" ]; then
+    echo "Error: '$COLLAPSE_DIR' is not a valid directory."
+    exit 1
+fi
+
+echo "Generating flame graphs from collapse files in '$COLLAPSE_DIR'..."
+
+# Find and process each .collapse file
+find "$COLLAPSE_DIR" -maxdepth 1 -name "*.collapse" -print0 | while IFS= read -r -d $'\0' collapse_file; do
+    if [ -f "$collapse_file" ]; then
+        # Construct the output SVG filename
+        svg_file="${collapse_file%.collapse}.svg"
+        echo "Generating $svg_file from $collapse_file..."
+        ./flamegraph.pl "$collapse_file" > "$svg_file"
+        if [ $? -ne 0 ]; then
+            echo "Error generating flame graph for $collapse_file"
+        else
+            echo "Successfully generated $svg_file"
+        fi
+    fi
+done
+
+echo "Flame graph generation complete."
--- a/docs/how-to/memory-profile-scripts/scripts/get_flamegraph_tool.sh
+++ b/docs/how-to/memory-profile-scripts/scripts/get_flamegraph_tool.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# Download flamegraph.pl to current directory - this is the flame graph generation tool script
+
+curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
+chmod +x ./flamegraph.pl
--- a/flake.lock
+++ b/flake.lock
@@ -8,11 +8,11 @@
        "rust-analyzer-src": "rust-analyzer-src"
      },
      "locked": {
-        "lastModified": 1737613896,
-        "narHash": "sha256-ldqXIglq74C7yKMFUzrS9xMT/EVs26vZpOD68Sh7OcU=",
+        "lastModified": 1742452566,
+        "narHash": "sha256-sVuLDQ2UIWfXUBbctzrZrXM2X05YjX08K7XHMztt36E=",
        "owner": "nix-community",
        "repo": "fenix",
-        "rev": "303a062fdd8e89f233db05868468975d17855d80",
+        "rev": "7d9ba794daf5e8cc7ee728859bc688d8e26d5f06",
        "type": "github"
      },
      "original": {
@@ -41,11 +41,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1737569578,
-        "narHash": "sha256-6qY0pk2QmUtBT9Mywdvif0i/CLVgpCjMUn6g9vB+f3M=",
+        "lastModified": 1743576891,
+        "narHash": "sha256-vXiKURtntURybE6FMNFAVpRPr8+e8KoLPrYs9TGuAKc=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "47addd76727f42d351590c905d9d1905ca895b82",
+        "rev": "44a69ed688786e98a101f02b712c313f1ade37ab",
        "type": "github"
      },
      "original": {
@@ -65,11 +65,11 @@
    "rust-analyzer-src": {
      "flake": false,
      "locked": {
-        "lastModified": 1737581772,
-        "narHash": "sha256-t1P2Pe3FAX9TlJsCZbmJ3wn+C4qr6aSMypAOu8WNsN0=",
+        "lastModified": 1742296961,
+        "narHash": "sha256-gCpvEQOrugHWLimD1wTFOJHagnSEP6VYBDspq96Idu0=",
        "owner": "rust-lang",
        "repo": "rust-analyzer",
-        "rev": "582af7ee9c8d84f5d534272fc7de9f292bd849be",
+        "rev": "15d87419f1a123d8f888d608129c3ce3ff8f13d4",
        "type": "github"
      },
      "original": {
--- a/flake.nix
+++ b/flake.nix
@@ -21,7 +21,7 @@
        lib = nixpkgs.lib;
        rustToolchain = fenix.packages.${system}.fromToolchainName {
          name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel;
-          sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4=";
+          sha256 = "sha256-i0Sh/ZFFsHlZ3oFZFc24qdk6Cd8Do8OPU4HJQsrKOeM=";
        };
      in
      {
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -1,61 +1,89 @@
-Grafana dashboard for GreptimeDB
--------------------------------
+# Grafana dashboards for GreptimeDB

-GreptimeDB's official Grafana dashboard.
+## Overview

-Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
+This repository maintains the Grafana dashboards for GreptimeDB. It has two types of dashboards:

-If you use Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
+- `cluster/dashboard.json`: The Grafana dashboard for the GreptimeDB cluster. Read the [dashboard.md](./dashboards/cluster/dashboard.md) for more details.
+- `standalone/dashboard.json`: The Grafana dashboard for the standalone GreptimeDB instance. **It's generated from the `cluster/dashboard.json` by removing the instance filter through the `make dashboards` command**. Read the [dashboard.md](./dashboards/standalone/dashboard.md) for more details.
+
+As the rapid development of GreptimeDB, the metrics may be changed, and please feel free to submit your feedback and/or contribution to this dashboard 🤗
+
+**NOTE**: 
+
+- The Grafana version should be greater than 9.0.
+
+- If you want to modify the dashboards, you only need to modify the `cluster/dashboard.json` and run the `make dashboards` command to generate the `standalone/dashboard.json` and other related files.
+
+To maintain the dashboards easily, we use the [`dac`](https://github.com/zyy17/dac) tool to generate the intermediate dashboards and markdown documents:
+
+- `cluster/dashboard.yaml`: The intermediate dashboard for the GreptimeDB cluster.
+- `standalone/dashboard.yaml`: The intermediate dashboard for the standalone GreptimeDB instance.
+
+## Data Sources
+
+There are two data sources for the dashboards to fetch the metrics:
+
+- **Prometheus**: Expose the metrics of GreptimeDB.
+- **Information Schema**: It is the MySQL port of the current monitored instance. The `overview` dashboard will use this datasource to show the information schema of the current instance.
+
+## Instance Filters
+
+To deploy the dashboards for multiple scenarios (K8s, bare metal, etc.), we prefer to use the `instance` label when filtering instances.
+
+Additionally, we recommend including the `pod` label in the legend to make it easier to identify each instance, even though this field will be empty in bare metal scenarios.
+
+For example, the following query is recommended:
+
+```promql
+sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
+```
+
+And the legend will be like: `[{{instance}}]-[{{ pod }}]`.
+
+## Deployment
+
+### Helm
+
+If you use the Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy a GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:

 - `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
 - `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;

-The standalone GreptimeDB instance will collect metrics from your cluster and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
+The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).

-# How to use
+### Self-host Prometheus and import dashboards manually

-## `greptimedb.json`
+1. **Configure Prometheus to scrape the cluster**

-Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.
+   The following is an example configuration(**Please modify it according to your actual situation**):

-## `greptimedb-cluster.json`
+    ```yml
+    # example config
+    # only to indicate how to assign labels to each target
+    # modify yours accordingly
+    scrape_configs:
+      - job_name: metasrv
+        static_configs:
+        - targets: ['<metasrv-ip>:<port>']

-This cluster dashboard provides a comprehensive view of incoming requests, response statuses, and internal activities such as flush and compaction, with a layered structure from frontend to datanode. Designed with a focus on alert functionality, its primary aim is to highlight any anomalies in metrics, allowing users to quickly pinpoint the cause of errors.
+      - job_name: datanode
+        static_configs:
+        - targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']

-We use Prometheus to scrape off metrics from nodes in GreptimeDB cluster, Grafana to visualize the diagram. Any compatible stack should work too.
+      - job_name: frontend
+        static_configs:
+        - targets: ['<frontend-ip>:<port>']
+    ```

-__Note__: This dashboard is still in an early stage of development. Any issue or advice on improvement is welcomed.
+2. **Configure the data sources in Grafana**

-### Configuration
+   You need to add two data sources in Grafana:

-Please ensure the following configuration before importing the dashboard into Grafana.
+   - Prometheus: It is the Prometheus instance that scrapes the GreptimeDB metrics.
+   - Information Schema: It is the MySQL port of the current monitored instance. The dashboard will use this datasource to show the information schema of the current instance.

-__1. Prometheus scrape config__
+3. **Import the dashboards based on your deployment scenario**

-Configure Prometheus to scrape the cluster.
-
-```yml
-# example config
-# only to indicate how to assign labels to each target
-# modify yours accordingly
-scrape_configs:
-  - job_name: metasrv
-    static_configs:
-    - targets: ['<metasrv-ip>:<port>']
-
-  - job_name: datanode
-    static_configs:
-    - targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']
-
-  - job_name: frontend
-    static_configs:
-    - targets: ['<frontend-ip>:<port>']
-```
-
-__2. Grafana config__
-
-Create a Prometheus data source in Grafana before using this dashboard. We use `datasource` as a variable in Grafana dashboard so that multiple environments are supported.
-
-### Usage
-
-Use `datasource` or `instance` on the upper-left corner to filter data from certain node.
+   - **Cluster**: Import the `cluster/dashboard.json` dashboard.
+   - **Standalone**: Import the `standalone/dashboard.json` dashboard.
--- a/grafana/check.sh
+++ b/grafana/check.sh
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-BASEDIR=$(dirname "$0")
-
-# Use jq to check for panels with empty or missing descriptions
-invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
-  .panels[]
-  | select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
-')
-
-# Check if any invalid panels were found
-if [[ -n "$invalid_panels" ]]; then
-  echo "Error: The following panels have empty or missing descriptions:"
-  echo "$invalid_panels"
-  exit 1
-else
-  echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
-  exit 0
-fi
--- a/grafana/dashboards/cluster/dashboard.json
+++ b/grafana/dashboards/cluster/dashboard.json
--- a/grafana/dashboards/cluster/dashboard.md
+++ b/grafana/dashboards/cluster/dashboard.md
@@ -0,0 +1,97 @@
+# Overview
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
+| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
+| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
+| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
+| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
+| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
+| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
+| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
+# Ingestion
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
+| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
+# Queries
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
+# Resources
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
+| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
+| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
+| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+# Frontend Requests
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
+| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
+| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
+| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
+| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
+| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
+| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
+| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
+# Frontend to Datanode
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
+| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
+| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
+# Mito Engine
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
+| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
+| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{instance=~"$datanode"}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
+| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
+| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
+| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
+| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
+| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
+| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
+| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
+| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
+| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
+| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
+| Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
+| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
+| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
+| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
+| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
+# OpenDAL
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
+| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
+| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
+# Metasrv
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
+| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
+| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
+# Flownode
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
+| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
+| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
+| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
+| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
--- a/grafana/dashboards/cluster/dashboard.yaml
+++ b/grafana/dashboards/cluster/dashboard.yaml
@@ -0,0 +1,769 @@
+groups:
+    - title: Overview
+      panels:
+        - title: Uptime
+          type: stat
+          description: The start time of GreptimeDB.
+          unit: s
+          queries:
+            - expr: time() - process_start_time_seconds
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+        - title: Version
+          type: stat
+          description: GreptimeDB version.
+          queries:
+            - expr: SELECT pkg_version FROM information_schema.build_info
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Total Ingestion Rate
+          type: stat
+          description: Total ingestion rate.
+          unit: rowsps
+          queries:
+            - expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+        - title: Total Storage Size
+          type: stat
+          description: Total number of data file size.
+          unit: decbytes
+          queries:
+            - expr: select SUM(disk_size) from information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Total Rows
+          type: stat
+          description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
+          unit: sishort
+          queries:
+            - expr: select SUM(region_rows) from information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Deployment
+          type: stat
+          description: The deployment topology of GreptimeDB.
+          queries:
+            - expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Database Resources
+          type: stat
+          description: The number of the key resources in GreptimeDB.
+          queries:
+            - expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT COUNT(*) as flows FROM information_schema.flows
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Data Size
+          type: stat
+          description: The data size of wal/index/manifest in the GreptimeDB.
+          unit: decbytes
+          queries:
+            - expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+    - title: Ingestion
+      panels:
+        - title: Total Ingestion Rate
+          type: timeseries
+          description: |
+            Total ingestion rate.
+
+            Here we listed 3 primary protocols:
+
+            - Prometheus remote write
+            - Greptime's gRPC API (when using our ingest SDK)
+            - Log ingestion http API
+          unit: rowsps
+          queries:
+            - expr: sum(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: ingestion
+        - title: Ingestion Rate by Type
+          type: timeseries
+          description: |
+            Total ingestion rate.
+
+            Here we listed 3 primary protocols:
+
+            - Prometheus remote write
+            - Greptime's gRPC API (when using our ingest SDK)
+            - Log ingestion http API
+          unit: rowsps
+          queries:
+            - expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: http-logs
+            - expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: prometheus-remote-write
+    - title: Queries
+      panels:
+        - title: Total Query Rate
+          type: timeseries
+          description: |-
+            Total rate of query API calls by protocol. This metric is collected from frontends.
+
+            Here we listed 3 main protocols:
+            - MySQL
+            - Postgres
+            - Prometheus API
+
+            Note that there are some other minor query APIs like /sql are not included
+          unit: reqps
+          queries:
+            - expr: sum (rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: mysql
+            - expr: sum (rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: pg
+            - expr: sum (rate(greptime_servers_http_promql_elapsed_counte{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: promql
+    - title: Resources
+      panels:
+        - title: Datanode Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{ pod }}]'
+        - title: Datanode CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Frontend Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Frontend CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
+        - title: Metasrv Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
+        - title: Metasrv CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Flownode Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Flownode CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+    - title: Frontend Requests
+      panels:
+        - title: HTTP QPS per Instance
+          type: timeseries
+          description: HTTP QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
+        - title: HTTP P99 per Instance
+          type: timeseries
+          description: HTTP P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{instance=~"$frontend",path!~"/health|/metrics"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
+        - title: gRPC QPS per Instance
+          type: timeseries
+          description: gRPC QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
+        - title: gRPC P99 per Instance
+          type: timeseries
+          description: gRPC P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
+        - title: MySQL QPS per Instance
+          type: timeseries
+          description: MySQL QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: MySQL P99 per Instance
+          type: timeseries
+          description: MySQL P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
+        - title: PostgreSQL QPS per Instance
+          type: timeseries
+          description: PostgreSQL QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: PostgreSQL P99 per Instance
+          type: timeseries
+          description: PostgreSQL P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{instance=~"$frontend"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+    - title: Frontend to Datanode
+      panels:
+        - title: Ingest Rows per Instance
+          type: timeseries
+          description: Ingestion rate by row as in each frontend
+          unit: rowsps
+          queries:
+            - expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Region Call QPS per Instance
+          type: timeseries
+          description: Region Call QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{instance=~"$frontend"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
+        - title: Region Call P99 per Instance
+          type: timeseries
+          description: Region Call P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{instance=~"$frontend"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
+    - title: Mito Engine
+      panels:
+        - title: Request OPS per Instance
+          type: timeseries
+          description: Request QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
+        - title: Request P99 per Instance
+          type: timeseries
+          description: Request P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
+        - title: Write Buffer per Instance
+          type: timeseries
+          description: Write Buffer per Instance.
+          unit: decbytes
+          queries:
+            - expr: greptime_mito_write_buffer_bytes{instance=~"$datanode"}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Write Rows per Instance
+          type: timeseries
+          description: Ingestion size by row counts.
+          unit: rowsps
+          queries:
+            - expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Flush OPS per Instance
+          type: timeseries
+          description: Flush QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
+        - title: Write Stall per Instance
+          type: timeseries
+          description: Write Stall per Instance.
+          queries:
+            - expr: sum by(instance, pod) (greptime_mito_write_stall_total{instance=~"$datanode"})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Read Stage OPS per Instance
+          type: timeseries
+          description: Read Stage OPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{instance=~"$datanode", stage="total"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Read Stage P99 per Instance
+          type: timeseries
+          description: Read Stage P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
+        - title: Write Stage P99 per Instance
+          type: timeseries
+          description: Write Stage P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
+        - title: Compaction OPS per Instance
+          type: timeseries
+          description: Compaction OPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{pod}}]'
+        - title: Compaction P99 per Instance by Stage
+          type: timeseries
+          description: Compaction latency by stage
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
+        - title: Compaction P99 per Instance
+          type: timeseries
+          description: Compaction P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
+        - title: WAL write size
+          type: timeseries
+          description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
+          unit: bytes
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
+            - expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
+            - expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
+        - title: Cached Bytes per Instance
+          type: timeseries
+          description: Cached Bytes per Instance.
+          unit: decbytes
+          queries:
+            - expr: greptime_mito_cache_bytes{instance=~"$datanode"}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
+        - title: Inflight Compaction
+          type: timeseries
+          description: Ongoing compaction task count
+          unit: none
+          queries:
+            - expr: greptime_mito_inflight_compaction_count
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: WAL sync duration seconds
+          type: timeseries
+          description: Raft engine (local disk) log store sync latency, p99
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+        - title: Log Store op duration seconds
+          type: timeseries
+          description: Write-ahead log operations latency at p99
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
+        - title: Inflight Flush
+          type: timeseries
+          description: Ongoing flush task count
+          unit: none
+          queries:
+            - expr: greptime_mito_inflight_flush_count
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+    - title: OpenDAL
+      panels:
+        - title: QPS per Instance
+          type: timeseries
+          description: QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: Read QPS per Instance
+          type: timeseries
+          description: Read QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: Read P99 per Instance
+          type: timeseries
+          description: Read P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+        - title: Write QPS per Instance
+          type: timeseries
+          description: Write QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+        - title: Write P99 per Instance
+          type: timeseries
+          description: Write P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: List QPS per Instance
+          type: timeseries
+          description: List QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: List P99 per Instance
+          type: timeseries
+          description: List P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: Other Requests per Instance
+          type: timeseries
+          description: Other Requests per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read|write|list|stat"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: Other Request P99 per Instance
+          type: timeseries
+          description: Other Request P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: Opendal traffic
+          type: timeseries
+          description: Total traffic as in bytes by instance and operation
+          unit: decbytes
+          queries:
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: OpenDAL errors per Instance
+          type: timeseries
+          description: OpenDAL error counts per Instance.
+          queries:
+            - expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
+    - title: Metasrv
+      panels:
+        - title: Region migration datanode
+          type: state-timeline
+          description: Counter of region migration by source and destination
+          unit: none
+          queries:
+            - expr: greptime_meta_region_migration_stat{datanode_type="src"}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: from-datanode-{{datanode_id}}
+            - expr: greptime_meta_region_migration_stat{datanode_type="desc"}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: to-datanode-{{datanode_id}}
+        - title: Region migration error
+          type: timeseries
+          description: Counter of region migration error
+          unit: none
+          queries:
+            - expr: greptime_meta_region_migration_error
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+        - title: Datanode load
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: none
+          queries:
+            - expr: greptime_datanode_load
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+    - title: Flownode
+      panels:
+        - title: Flow Ingest / Output Rate
+          type: timeseries
+          description: Flow Ingest / Output Rate.
+          queries:
+            - expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
+        - title: Flow Ingest Latency
+          type: timeseries
+          description: Flow Ingest Latency.
+          queries:
+            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p95'
+            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+        - title: Flow Operation Latency
+          type: timeseries
+          description: Flow Operation Latency.
+          queries:
+            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
+            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
+        - title: Flow Buffer Size per Instance
+          type: timeseries
+          description: Flow Buffer Size per Instance.
+          queries:
+            - expr: greptime_flow_input_buf_size
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}]'
+        - title: Flow Processing Error per Instance
+          type: timeseries
+          description: Flow Processing Error per Instance.
+          queries:
+            - expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
--- a/grafana/dashboards/standalone/dashboard.json
+++ b/grafana/dashboards/standalone/dashboard.json
--- a/grafana/dashboards/standalone/dashboard.md
+++ b/grafana/dashboards/standalone/dashboard.md
@@ -0,0 +1,97 @@
+# Overview
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Uptime | `time() - process_start_time_seconds` | `stat` | The start time of GreptimeDB. | `prometheus` | `s` | `__auto` |
+| Version | `SELECT pkg_version FROM information_schema.build_info` | `stat` | GreptimeDB version. | `mysql` | -- | -- |
+| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))` | `stat` | Total ingestion rate. | `prometheus` | `rowsps` | `__auto` |
+| Total Storage Size | `select SUM(disk_size) from information_schema.region_statistics;` | `stat` | Total number of data file size. | `mysql` | `decbytes` | -- |
+| Total Rows | `select SUM(region_rows) from information_schema.region_statistics;` | `stat` | Total number of data rows in the cluster. Calculated by sum of rows from each region. | `mysql` | `sishort` | -- |
+| Deployment | `SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';`<br/>`SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';`<br/>`SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';`<br/>`SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';` | `stat` | The deployment topology of GreptimeDB. | `mysql` | -- | -- |
+| Database Resources | `SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')`<br/>`SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'`<br/>`SELECT COUNT(region_id) as regions FROM information_schema.region_peers`<br/>`SELECT COUNT(*) as flows FROM information_schema.flows` | `stat` | The number of the key resources in GreptimeDB. | `mysql` | -- | -- |
+| Data Size | `SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;`<br/>`SELECT SUM(index_size) as index FROM information_schema.region_statistics;`<br/>`SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;` | `stat` | The data size of wal/index/manifest in the GreptimeDB. | `mysql` | `decbytes` | -- |
+# Ingestion
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Total Ingestion Rate | `sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `ingestion` |
+| Ingestion Rate by Type | `sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))`<br/>`sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))` | `timeseries` | Total ingestion rate.<br/><br/>Here we listed 3 primary protocols:<br/><br/>- Prometheus remote write<br/>- Greptime's gRPC API (when using our ingest SDK)<br/>- Log ingestion http API<br/> | `prometheus` | `rowsps` | `http-logs` |
+# Queries
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Total Query Rate | `sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))`<br/>`sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))` | `timeseries` | Total rate of query API calls by protocol. This metric is collected from frontends.<br/><br/>Here we listed 3 main protocols:<br/>- MySQL<br/>- Postgres<br/>- Prometheus API<br/><br/>Note that there are some other minor query APIs like /sql are not included | `prometheus` | `reqps` | `mysql` |
+# Resources
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
+| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
+| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
+| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
+| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
+# Frontend Requests
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| HTTP QPS per Instance | `sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health\|/metrics"}[$__rate_interval]))` | `timeseries` | HTTP QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]` |
+| HTTP P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health\|/metrics"}[$__rate_interval])))` | `timeseries` | HTTP P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
+| gRPC QPS per Instance | `sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))` | `timeseries` | gRPC QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]` |
+| gRPC P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | gRPC P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99` |
+| MySQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | MySQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
+| MySQL P99 per Instance | `histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | MySQL P99 per Instance. | `prometheus` | `s` | `[{{ instance }}]-[{{ pod }}]-p99` |
+| PostgreSQL QPS per Instance | `sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))` | `timeseries` | PostgreSQL QPS per Instance. | `prometheus` | `reqps` | `[{{instance}}]-[{{pod}}]` |
+| PostgreSQL P99 per Instance | `histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | PostgreSQL P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
+# Frontend to Datanode
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Ingest Rows per Instance | `sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))` | `timeseries` | Ingestion rate by row as in each frontend | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
+| Region Call QPS per Instance | `sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))` | `timeseries` | Region Call QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
+| Region Call P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))` | `timeseries` | Region Call P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{request_type}}]` |
+# Mito Engine
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Request OPS per Instance | `sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))` | `timeseries` | Request QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
+| Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
+| Write Buffer per Instance | `greptime_mito_write_buffer_bytes{}` | `timeseries` | Write Buffer per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]` |
+| Write Rows per Instance | `sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))` | `timeseries` | Ingestion size by row counts. | `prometheus` | `rowsps` | `[{{instance}}]-[{{pod}}]` |
+| Flush OPS per Instance | `sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))` | `timeseries` | Flush QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{reason}}]` |
+| Write Stall per Instance | `sum by(instance, pod) (greptime_mito_write_stall_total{})` | `timeseries` | Write Stall per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
+| Read Stage OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))` | `timeseries` | Read Stage OPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]` |
+| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
+| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
+| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
+| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
+| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
+| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
+| Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
+| Inflight Compaction | `greptime_mito_inflight_compaction_count` | `timeseries` | Ongoing compaction task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
+| WAL sync duration seconds | `histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))` | `timeseries` | Raft engine (local disk) log store sync latency, p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
+| Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
+| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
+# OpenDAL
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
+| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
+| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
+# Metasrv
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
+| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
+| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
+# Flownode
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
+| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
+| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
+| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
+| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
--- a/grafana/dashboards/standalone/dashboard.yaml
+++ b/grafana/dashboards/standalone/dashboard.yaml
@@ -0,0 +1,769 @@
+groups:
+    - title: Overview
+      panels:
+        - title: Uptime
+          type: stat
+          description: The start time of GreptimeDB.
+          unit: s
+          queries:
+            - expr: time() - process_start_time_seconds
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+        - title: Version
+          type: stat
+          description: GreptimeDB version.
+          queries:
+            - expr: SELECT pkg_version FROM information_schema.build_info
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Total Ingestion Rate
+          type: stat
+          description: Total ingestion rate.
+          unit: rowsps
+          queries:
+            - expr: sum(rate(greptime_table_operator_ingest_rows[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+        - title: Total Storage Size
+          type: stat
+          description: Total number of data file size.
+          unit: decbytes
+          queries:
+            - expr: select SUM(disk_size) from information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Total Rows
+          type: stat
+          description: Total number of data rows in the cluster. Calculated by sum of rows from each region.
+          unit: sishort
+          queries:
+            - expr: select SUM(region_rows) from information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Deployment
+          type: stat
+          description: The deployment topology of GreptimeDB.
+          queries:
+            - expr: SELECT count(*) as datanode FROM information_schema.cluster_info WHERE peer_type = 'DATANODE';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT count(*) as frontend FROM information_schema.cluster_info WHERE peer_type = 'FRONTEND';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT count(*) as metasrv FROM information_schema.cluster_info WHERE peer_type = 'METASRV';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT count(*) as flownode FROM information_schema.cluster_info WHERE peer_type = 'FLOWNODE';
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Database Resources
+          type: stat
+          description: The number of the key resources in GreptimeDB.
+          queries:
+            - expr: SELECT COUNT(*) as databases FROM information_schema.schemata WHERE schema_name NOT IN ('greptime_private', 'information_schema')
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT COUNT(*) as tables FROM information_schema.tables WHERE table_schema != 'information_schema'
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT COUNT(region_id) as regions FROM information_schema.region_peers
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT COUNT(*) as flows FROM information_schema.flows
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+        - title: Data Size
+          type: stat
+          description: The data size of wal/index/manifest in the GreptimeDB.
+          unit: decbytes
+          queries:
+            - expr: SELECT SUM(memtable_size) * 0.42825 as WAL FROM information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT SUM(index_size) as index FROM information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+            - expr: SELECT SUM(manifest_size) as manifest FROM information_schema.region_statistics;
+              datasource:
+                type: mysql
+                uid: ${information_schema}
+    - title: Ingestion
+      panels:
+        - title: Total Ingestion Rate
+          type: timeseries
+          description: |
+            Total ingestion rate.
+
+            Here we listed 3 primary protocols:
+
+            - Prometheus remote write
+            - Greptime's gRPC API (when using our ingest SDK)
+            - Log ingestion http API
+          unit: rowsps
+          queries:
+            - expr: sum(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: ingestion
+        - title: Ingestion Rate by Type
+          type: timeseries
+          description: |
+            Total ingestion rate.
+
+            Here we listed 3 primary protocols:
+
+            - Prometheus remote write
+            - Greptime's gRPC API (when using our ingest SDK)
+            - Log ingestion http API
+          unit: rowsps
+          queries:
+            - expr: sum(rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: http-logs
+            - expr: sum(rate(greptime_servers_prometheus_remote_write_samples[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: prometheus-remote-write
+    - title: Queries
+      panels:
+        - title: Total Query Rate
+          type: timeseries
+          description: |-
+            Total rate of query API calls by protocol. This metric is collected from frontends.
+
+            Here we listed 3 main protocols:
+            - MySQL
+            - Postgres
+            - Prometheus API
+
+            Note that there are some other minor query APIs like /sql are not included
+          unit: reqps
+          queries:
+            - expr: sum (rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: mysql
+            - expr: sum (rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: pg
+            - expr: sum (rate(greptime_servers_http_promql_elapsed_counte{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: promql
+    - title: Resources
+      panels:
+        - title: Datanode Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{ pod }}]'
+        - title: Datanode CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Frontend Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Frontend CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
+        - title: Metasrv Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
+        - title: Metasrv CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Flownode Memory per Instance
+          type: timeseries
+          description: Current memory usage by instance
+          unit: decbytes
+          queries:
+            - expr: sum(process_resident_memory_bytes{}) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+        - title: Flownode CPU Usage per Instance
+          type: timeseries
+          description: Current cpu usage by instance
+          unit: none
+          queries:
+            - expr: sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]'
+    - title: Frontend Requests
+      panels:
+        - title: HTTP QPS per Instance
+          type: timeseries
+          description: HTTP QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(instance, pod, path, method, code) (rate(greptime_servers_http_requests_elapsed_count{path!~"/health|/metrics"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]'
+        - title: HTTP P99 per Instance
+          type: timeseries
+          description: HTTP P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, method, code) (rate(greptime_servers_http_requests_elapsed_bucket{path!~"/health|/metrics"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
+        - title: gRPC QPS per Instance
+          type: timeseries
+          description: gRPC QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(instance, pod, path, code) (rate(greptime_servers_grpc_requests_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{code}}]'
+        - title: gRPC P99 per Instance
+          type: timeseries
+          description: gRPC P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, path, code) (rate(greptime_servers_grpc_requests_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{path}}]-[{{method}}]-[{{code}}]-p99'
+        - title: MySQL QPS per Instance
+          type: timeseries
+          description: MySQL QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(pod, instance)(rate(greptime_servers_mysql_query_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: MySQL P99 per Instance
+          type: timeseries
+          description: MySQL P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(pod, instance, le) (rate(greptime_servers_mysql_query_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{ pod }}]-p99'
+        - title: PostgreSQL QPS per Instance
+          type: timeseries
+          description: PostgreSQL QPS per Instance.
+          unit: reqps
+          queries:
+            - expr: sum by(pod, instance)(rate(greptime_servers_postgres_query_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: PostgreSQL P99 per Instance
+          type: timeseries
+          description: PostgreSQL P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(pod,instance,le) (rate(greptime_servers_postgres_query_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+    - title: Frontend to Datanode
+      panels:
+        - title: Ingest Rows per Instance
+          type: timeseries
+          description: Ingestion rate by row as in each frontend
+          unit: rowsps
+          queries:
+            - expr: sum by(instance, pod)(rate(greptime_table_operator_ingest_rows{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Region Call QPS per Instance
+          type: timeseries
+          description: Region Call QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, request_type) (rate(greptime_grpc_region_request_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
+        - title: Region Call P99 per Instance
+          type: timeseries
+          description: Region Call P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, request_type) (rate(greptime_grpc_region_request_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{request_type}}]'
+    - title: Mito Engine
+      panels:
+        - title: Request OPS per Instance
+          type: timeseries
+          description: Request QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, type) (rate(greptime_mito_handle_request_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
+        - title: Request P99 per Instance
+          type: timeseries
+          description: Request P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, type) (rate(greptime_mito_handle_request_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
+        - title: Write Buffer per Instance
+          type: timeseries
+          description: Write Buffer per Instance.
+          unit: decbytes
+          queries:
+            - expr: greptime_mito_write_buffer_bytes{}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Write Rows per Instance
+          type: timeseries
+          description: Ingestion size by row counts.
+          unit: rowsps
+          queries:
+            - expr: sum by (instance, pod) (rate(greptime_mito_write_rows_total{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Flush OPS per Instance
+          type: timeseries
+          description: Flush QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, reason) (rate(greptime_mito_flush_requests_total{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{reason}}]'
+        - title: Write Stall per Instance
+          type: timeseries
+          description: Write Stall per Instance.
+          queries:
+            - expr: sum by(instance, pod) (greptime_mito_write_stall_total{})
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Read Stage OPS per Instance
+          type: timeseries
+          description: Read Stage OPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod) (rate(greptime_mito_read_stage_elapsed_count{ stage="total"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: Read Stage P99 per Instance
+          type: timeseries
+          description: Read Stage P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
+        - title: Write Stage P99 per Instance
+          type: timeseries
+          description: Write Stage P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]'
+        - title: Compaction OPS per Instance
+          type: timeseries
+          description: Compaction OPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{ instance }}]-[{{pod}}]'
+        - title: Compaction P99 per Instance by Stage
+          type: timeseries
+          description: Compaction latency by stage
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
+        - title: Compaction P99 per Instance
+          type: timeseries
+          description: Compaction P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction'
+        - title: WAL write size
+          type: timeseries
+          description: Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate.
+          unit: bytes
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p95'
+            - expr: histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-req-size-p99'
+            - expr: sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-throughput'
+        - title: Cached Bytes per Instance
+          type: timeseries
+          description: Cached Bytes per Instance.
+          unit: decbytes
+          queries:
+            - expr: greptime_mito_cache_bytes{}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
+        - title: Inflight Compaction
+          type: timeseries
+          description: Ongoing compaction task count
+          unit: none
+          queries:
+            - expr: greptime_mito_inflight_compaction_count
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+        - title: WAL sync duration seconds
+          type: timeseries
+          description: Raft engine (local disk) log store sync latency, p99
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(le, type, node, instance, pod) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+        - title: Log Store op duration seconds
+          type: timeseries
+          description: Write-ahead log operations latency at p99
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99'
+        - title: Inflight Flush
+          type: timeseries
+          description: Ongoing flush task count
+          unit: none
+          queries:
+            - expr: greptime_mito_inflight_flush_count
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]'
+    - title: OpenDAL
+      panels:
+        - title: QPS per Instance
+          type: timeseries
+          description: QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: Read QPS per Instance
+          type: timeseries
+          description: Read QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: Read P99 per Instance
+          type: timeseries
+          description: Read P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+        - title: Write QPS per Instance
+          type: timeseries
+          description: Write QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+        - title: Write P99 per Instance
+          type: timeseries
+          description: Write P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: List QPS per Instance
+          type: timeseries
+          description: List QPS per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: List P99 per Instance
+          type: timeseries
+          description: List P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+        - title: Other Requests per Instance
+          type: timeseries
+          description: Other Requests per Instance.
+          unit: ops
+          queries:
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read|write|list|stat"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: Other Request P99 per Instance
+          type: timeseries
+          description: Other Request P99 per Instance.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: Opendal traffic
+          type: timeseries
+          description: Total traffic as in bytes by instance and operation
+          unit: decbytes
+          queries:
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
+        - title: OpenDAL errors per Instance
+          type: timeseries
+          description: OpenDAL error counts per Instance.
+          queries:
+            - expr: sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
+    - title: Metasrv
+      panels:
+        - title: Region migration datanode
+          type: state-timeline
+          description: Counter of region migration by source and destination
+          unit: none
+          queries:
+            - expr: greptime_meta_region_migration_stat{datanode_type="src"}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: from-datanode-{{datanode_id}}
+            - expr: greptime_meta_region_migration_stat{datanode_type="desc"}
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: to-datanode-{{datanode_id}}
+        - title: Region migration error
+          type: timeseries
+          description: Counter of region migration error
+          unit: none
+          queries:
+            - expr: greptime_meta_region_migration_error
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+        - title: Datanode load
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: none
+          queries:
+            - expr: greptime_datanode_load
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: __auto
+    - title: Flownode
+      panels:
+        - title: Flow Ingest / Output Rate
+          type: timeseries
+          description: Flow Ingest / Output Rate.
+          queries:
+            - expr: sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{pod}}]-[{{instance}}]-[{{direction}}]'
+        - title: Flow Ingest Latency
+          type: timeseries
+          description: Flow Ingest Latency.
+          queries:
+            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p95'
+            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-p99'
+        - title: Flow Operation Latency
+          type: timeseries
+          description: Flow Operation Latency.
+          queries:
+            - expr: histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p95'
+            - expr: histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]-p99'
+        - title: Flow Buffer Size per Instance
+          type: timeseries
+          description: Flow Buffer Size per Instance.
+          queries:
+            - expr: greptime_flow_input_buf_size
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}]'
+        - title: Flow Processing Error per Instance
+          type: timeseries
+          description: Flow Processing Error per Instance.
+          queries:
+            - expr: sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
--- a/grafana/greptimedb-cluster.json
+++ b/grafana/greptimedb-cluster.json
--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
--- a/grafana/scripts/check.sh
+++ b/grafana/scripts/check.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+
+DASHBOARD_DIR=${1:-grafana/dashboards}
+
+check_dashboard_description() {
+  for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
+    echo "Checking $dashboard description"
+
+    # Use jq to check for panels with empty or missing descriptions
+    invalid_panels=$(cat $dashboard | jq -r '
+      .panels[]
+    | select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))')
+
+    # Check if any invalid panels were found
+    if [[ -n "$invalid_panels" ]]; then
+      echo "Error: The following panels have empty or missing descriptions:"
+      echo "$invalid_panels"
+      exit 1
+    else
+      echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
+    fi
+  done
+}
+
+check_dashboards_generation() {
+  ./grafana/scripts/gen-dashboards.sh
+
+  if [[ -n "$(git diff --name-only grafana/dashboards)" ]]; then
+    echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
+    exit 1
+  fi
+}
+
+check_datasource() {
+  for dashboard in $(find $DASHBOARD_DIR -name "*.json"); do
+    echo "Checking $dashboard datasource"
+    jq -r '.panels[] | select(.type != "row") | .targets[] | [.datasource.type, .datasource.uid] | @tsv' $dashboard | while read -r type uid; do
+    # if the datasource is prometheus, check if the uid is ${metrics}
+    if [[ "$type" == "prometheus" && "$uid" != "\${metrics}" ]]; then
+      echo "Error: The datasource uid of $dashboard is not valid. It should be \${metrics}, got $uid"
+      exit 1
+    fi
+    # if the datasource is mysql, check if the uid is ${information_schema}
+    if [[ "$type" == "mysql" && "$uid" != "\${information_schema}" ]]; then
+      echo "Error: The datasource uid of $dashboard is not valid. It should be \${information_schema}, got $uid"
+      exit 1
+    fi
+    done
+  done
+}
+
+check_dashboards_generation
+check_dashboard_description
+check_datasource
--- a/grafana/scripts/gen-dashboards.sh
+++ b/grafana/scripts/gen-dashboards.sh
@@ -0,0 +1,25 @@
+#! /usr/bin/env bash
+
+CLUSTER_DASHBOARD_DIR=${1:-grafana/dashboards/cluster}
+STANDALONE_DASHBOARD_DIR=${2:-grafana/dashboards/standalone}
+DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
+
+remove_instance_filters() {
+  # Remove the instance filters for the standalone dashboards.
+  sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
+}
+
+generate_intermediate_dashboards_and_docs() {
+  docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
+    -i /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.json \
+    -o /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.yaml \
+    -m /greptimedb/$CLUSTER_DASHBOARD_DIR/dashboard.md
+
+  docker run -v ${PWD}:/greptimedb --rm ${DAC_IMAGE} \
+    -i /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.json \
+    -o /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.yaml \
+    -m /greptimedb/$STANDALONE_DASHBOARD_DIR/dashboard.md
+}
+
+remove_instance_filters
+generate_intermediate_dashboards_and_docs
--- a/grafana/summary.sh
+++ b/grafana/summary.sh
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-
-BASEDIR=$(dirname "$0")
-echo '| Title | Description | Expressions |
-|---|---|---|'
-
-cat $BASEDIR/greptimedb-cluster.json | jq -r '
-  .panels |
-  map(select(.type == "stat" or .type == "timeseries")) |
-  .[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`")  | join("<br>")) |"
-'
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2024-12-25"
+channel = "nightly-2025-04-15"
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -514,6 +514,7 @@ fn query_request_type(request: &QueryRequest) -> &'static str {
        Some(Query::Sql(_)) => "query.sql",
        Some(Query::LogicalPlan(_)) => "query.logical_plan",
        Some(Query::PromRangeQuery(_)) => "query.prom_range",
+        Some(Query::InsertIntoPlan(_)) => "query.insert_into_plan",
        None => "query.empty",
    }
 }
--- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs
+++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs
@@ -24,7 +24,7 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatch
 use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
 use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
 use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
-use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::schema::{ColumnSchema, FulltextBackend, Schema, SchemaRef};
 use datatypes::value::Value;
 use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
 use futures_util::TryStreamExt;
@@ -47,20 +47,38 @@ pub const TABLE_SCHEMA: &str = "table_schema";
 pub const TABLE_NAME: &str = "table_name";
 pub const COLUMN_NAME: &str = "column_name";
 pub const ORDINAL_POSITION: &str = "ordinal_position";
+/// The type of the index.
+pub const GREPTIME_INDEX_TYPE: &str = "greptime_index_type";
 const INIT_CAPACITY: usize = 42;

-/// Primary key constraint name
-pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
 /// Time index constraint name
-pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
+pub(crate) const CONSTRAINT_NAME_TIME_INDEX: &str = "TIME INDEX";
+
+/// Primary key constraint name
+pub(crate) const CONSTRAINT_NAME_PRI: &str = "PRIMARY";
+/// Primary key index type
+pub(crate) const INDEX_TYPE_PRI: &str = "greptime-primary-key-v1";
+
 /// Inverted index constraint name
-pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
+pub(crate) const CONSTRAINT_NAME_INVERTED_INDEX: &str = "INVERTED INDEX";
+/// Inverted index type
+pub(crate) const INDEX_TYPE_INVERTED_INDEX: &str = "greptime-inverted-index-v1";
+
 /// Fulltext index constraint name
-pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
+pub(crate) const CONSTRAINT_NAME_FULLTEXT_INDEX: &str = "FULLTEXT INDEX";
+/// Fulltext index v1 type
+pub(crate) const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "greptime-fulltext-index-v1";
+/// Fulltext index bloom type
+pub(crate) const INDEX_TYPE_FULLTEXT_BLOOM: &str = "greptime-fulltext-index-bloom";
+
 /// Skipping index constraint name
-pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
+pub(crate) const CONSTRAINT_NAME_SKIPPING_INDEX: &str = "SKIPPING INDEX";
+/// Skipping index type
+pub(crate) const INDEX_TYPE_SKIPPING_INDEX: &str = "greptime-bloom-filter-v1";

 /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
+///
+/// Provides an extra column `greptime_index_type` for the index type of the key column.
 #[derive(Debug)]
 pub(super) struct InformationSchemaKeyColumnUsage {
    schema: SchemaRef,
@@ -120,6 +138,11 @@ impl InformationSchemaKeyColumnUsage {
                ConcreteDataType::string_datatype(),
                true,
            ),
+            ColumnSchema::new(
+                GREPTIME_INDEX_TYPE,
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
        ]))
    }

@@ -184,6 +207,7 @@ struct InformationSchemaKeyColumnUsageBuilder {
    column_name: StringVectorBuilder,
    ordinal_position: UInt32VectorBuilder,
    position_in_unique_constraint: UInt32VectorBuilder,
+    greptime_index_type: StringVectorBuilder,
 }

 impl InformationSchemaKeyColumnUsageBuilder {
@@ -206,6 +230,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
            column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
            position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
+            greptime_index_type: StringVectorBuilder::with_capacity(INIT_CAPACITY),
        }
    }

@@ -229,34 +254,47 @@ impl InformationSchemaKeyColumnUsageBuilder {

                for (idx, column) in schema.column_schemas().iter().enumerate() {
                    let mut constraints = vec![];
+                    let mut greptime_index_type = vec![];
                    if column.is_time_index() {
                        self.add_key_column_usage(
                            &predicates,
                            &schema_name,
-                            TIME_INDEX_CONSTRAINT_NAME,
+                            CONSTRAINT_NAME_TIME_INDEX,
                            &catalog_name,
                            &schema_name,
                            table_name,
                            &column.name,
                            1, //always 1 for time index
+                            "",
                        );
                    }
                    // TODO(dimbtp): foreign key constraint not supported yet
                    if keys.contains(&idx) {
-                        constraints.push(PRI_CONSTRAINT_NAME);
+                        constraints.push(CONSTRAINT_NAME_PRI);
+                        greptime_index_type.push(INDEX_TYPE_PRI);
                    }
                    if column.is_inverted_indexed() {
-                        constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
+                        constraints.push(CONSTRAINT_NAME_INVERTED_INDEX);
+                        greptime_index_type.push(INDEX_TYPE_INVERTED_INDEX);
                    }
-                    if column.is_fulltext_indexed() {
-                        constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
+                    if let Ok(Some(options)) = column.fulltext_options() {
+                        if options.enable {
+                            constraints.push(CONSTRAINT_NAME_FULLTEXT_INDEX);
+                            let index_type = match options.backend {
+                                FulltextBackend::Bloom => INDEX_TYPE_FULLTEXT_BLOOM,
+                                FulltextBackend::Tantivy => INDEX_TYPE_FULLTEXT_TANTIVY,
+                            };
+                            greptime_index_type.push(index_type);
+                        }
                    }
                    if column.is_skipping_indexed() {
-                        constraints.push(SKIPPING_INDEX_CONSTRAINT_NAME);
+                        constraints.push(CONSTRAINT_NAME_SKIPPING_INDEX);
+                        greptime_index_type.push(INDEX_TYPE_SKIPPING_INDEX);
                    }

                    if !constraints.is_empty() {
                        let aggregated_constraints = constraints.join(", ");
+                        let aggregated_index_types = greptime_index_type.join(", ");
                        self.add_key_column_usage(
                            &predicates,
                            &schema_name,
@@ -266,6 +304,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
                            table_name,
                            &column.name,
                            idx as u32 + 1,
+                            &aggregated_index_types,
                        );
                    }
                }
@@ -288,6 +327,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
        table_name: &str,
        column_name: &str,
        ordinal_position: u32,
+        index_types: &str,
    ) {
        let row = [
            (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
@@ -297,6 +337,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
            (TABLE_NAME, &Value::from(table_name)),
            (COLUMN_NAME, &Value::from(column_name)),
            (ORDINAL_POSITION, &Value::from(ordinal_position)),
+            (GREPTIME_INDEX_TYPE, &Value::from(index_types)),
        ];

        if !predicates.eval(&row) {
@@ -313,6 +354,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
        self.column_name.push(Some(column_name));
        self.ordinal_position.push(Some(ordinal_position));
        self.position_in_unique_constraint.push(None);
+        self.greptime_index_type.push(Some(index_types));
    }

    fn finish(&mut self) -> Result<RecordBatch> {
@@ -336,6 +378,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
            null_string_vector.clone(),
            null_string_vector.clone(),
            null_string_vector,
+            Arc::new(self.greptime_index_type.finish()),
        ];
        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
    }
--- a/src/catalog/src/system_schema/information_schema/table_constraints.rs
+++ b/src/catalog/src/system_schema/information_schema/table_constraints.rs
@@ -36,7 +36,7 @@ use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::key_column_usage::{
-    PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
+    CONSTRAINT_NAME_PRI, CONSTRAINT_NAME_TIME_INDEX,
 };
 use crate::information_schema::Predicates;
 use crate::system_schema::information_schema::{InformationTable, TABLE_CONSTRAINTS};
@@ -188,7 +188,7 @@ impl InformationSchemaTableConstraintsBuilder {
                    self.add_table_constraint(
                        &predicates,
                        &schema_name,
-                        TIME_INDEX_CONSTRAINT_NAME,
+                        CONSTRAINT_NAME_TIME_INDEX,
                        &schema_name,
                        &table.table_info().name,
                        TIME_INDEX_CONSTRAINT_TYPE,
@@ -199,7 +199,7 @@ impl InformationSchemaTableConstraintsBuilder {
                    self.add_table_constraint(
                        &predicates,
                        &schema_name,
-                        PRI_CONSTRAINT_NAME,
+                        CONSTRAINT_NAME_PRI,
                        &schema_name,
                        &table.table_info().name,
                        PRI_KEY_CONSTRAINT_TYPE,
--- a/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs
@@ -84,12 +84,6 @@ mod tests {
        let key1 = "3178510";
        let key2 = "4215648";

-        // have collision
-        assert_eq!(
-            oid_map.hasher.hash_one(key1) as u32,
-            oid_map.hasher.hash_one(key2) as u32
-        );
-
        // insert them into oid_map
        let oid1 = oid_map.get_oid(key1);
        let oid2 = oid_map.get_oid(key2);
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -27,7 +27,7 @@ use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
 use table::metadata::TableType;
 use table::table::adapter::DfTableProviderAdapter;
-mod dummy_catalog;
+pub mod dummy_catalog;
 use dummy_catalog::DummyCatalogList;
 use table::TableRef;

--- a/src/cli/src/error.rs
+++ b/src/cli/src/error.rs
@@ -17,7 +17,6 @@ use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
-use rustyline::error::ReadlineError;
 use snafu::{Location, Snafu};

 #[derive(Snafu)]
@@ -105,52 +104,6 @@ pub enum Error {
    #[snafu(display("Invalid REPL command: {reason}"))]
    InvalidReplCommand { reason: String },

-    #[snafu(display("Cannot create REPL"))]
-    ReplCreation {
-        #[snafu(source)]
-        error: ReadlineError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Error reading command"))]
-    Readline {
-        #[snafu(source)]
-        error: ReadlineError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Failed to request database, sql: {sql}"))]
-    RequestDatabase {
-        sql: String,
-        #[snafu(source)]
-        source: client::Error,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Failed to collect RecordBatches"))]
-    CollectRecordBatches {
-        #[snafu(implicit)]
-        location: Location,
-        source: common_recordbatch::error::Error,
-    },
-
-    #[snafu(display("Failed to pretty print Recordbatches"))]
-    PrettyPrintRecordBatches {
-        #[snafu(implicit)]
-        location: Location,
-        source: common_recordbatch::error::Error,
-    },
-
-    #[snafu(display("Failed to start Meta client"))]
-    StartMetaClient {
-        #[snafu(implicit)]
-        location: Location,
-        source: meta_client::error::Error,
-    },
-
    #[snafu(display("Failed to parse SQL: {}", sql))]
    ParseSql {
        sql: String,
@@ -166,13 +119,6 @@ pub enum Error {
        source: query::error::Error,
    },

-    #[snafu(display("Failed to encode logical plan in substrait"))]
-    SubstraitEncodeLogicalPlan {
-        #[snafu(implicit)]
-        location: Location,
-        source: substrait::error::Error,
-    },
-
    #[snafu(display("Failed to load layered config"))]
    LoadLayeredConfig {
        #[snafu(source(from(common_config::error::Error, Box::new)))]
@@ -318,17 +264,10 @@ impl ErrorExt for Error {
            Error::StartProcedureManager { source, .. }
            | Error::StopProcedureManager { source, .. } => source.status_code(),
            Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
-            Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
-                StatusCode::Internal
-            }
-            Error::RequestDatabase { source, .. } => source.status_code(),
-            Error::CollectRecordBatches { source, .. }
-            | Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
-            Error::StartMetaClient { source, .. } => source.status_code(),
+            Error::HttpQuerySql { .. } => StatusCode::Internal,
            Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
                source.status_code()
            }
-            Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),

            Error::SerdeJson { .. }
            | Error::FileIo { .. }
--- a/src/cli/src/lib.rs
+++ b/src/cli/src/lib.rs
@@ -23,15 +23,12 @@ mod helper;
 // Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
 mod database;
 mod import;
-#[allow(unused)]
-mod repl;

 use async_trait::async_trait;
 use clap::Parser;
 use common_error::ext::BoxedError;
 pub use database::DatabaseClient;
 use error::Result;
-pub use repl::Repl;

 pub use crate::bench::BenchTableMetadataCommand;
 pub use crate::export::ExportCommand;
--- a/src/cli/src/repl.rs
+++ b/src/cli/src/repl.rs
@@ -1,299 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::time::Instant;
-
-use cache::{
-    build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME,
-    TABLE_ROUTE_CACHE_NAME,
-};
-use catalog::information_extension::DistributedInformationExtension;
-use catalog::kvbackend::{
-    CachedKvBackend, CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend,
-};
-use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
-use common_base::Plugins;
-use common_config::Mode;
-use common_error::ext::ErrorExt;
-use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
-use common_meta::kv_backend::KvBackendRef;
-use common_query::Output;
-use common_recordbatch::RecordBatches;
-use common_telemetry::debug;
-use either::Either;
-use meta_client::client::{ClusterKvBackend, MetaClientBuilder};
-use query::datafusion::DatafusionQueryEngine;
-use query::parser::QueryLanguageParser;
-use query::query_engine::{DefaultSerializer, QueryEngineState};
-use query::QueryEngine;
-use rustyline::error::ReadlineError;
-use rustyline::Editor;
-use session::context::QueryContext;
-use snafu::{OptionExt, ResultExt};
-use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
-
-use crate::cmd::ReplCommand;
-use crate::error::{
-    CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
-    ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
-    SubstraitEncodeLogicalPlanSnafu,
-};
-use crate::helper::RustylineHelper;
-use crate::{error, AttachCommand};
-
-/// Captures the state of the repl, gathers commands and executes them one by one
-pub struct Repl {
-    /// Rustyline editor for interacting with user on command line
-    rl: Editor<RustylineHelper>,
-
-    /// Current prompt
-    prompt: String,
-
-    /// Client for interacting with GreptimeDB
-    database: Database,
-
-    query_engine: Option<DatafusionQueryEngine>,
-}
-
-#[allow(clippy::print_stdout)]
-impl Repl {
-    fn print_help(&self) {
-        println!("{}", ReplCommand::help())
-    }
-
-    pub(crate) async fn try_new(cmd: &AttachCommand) -> Result<Self> {
-        let mut rl = Editor::new().context(ReplCreationSnafu)?;
-
-        if !cmd.disable_helper {
-            rl.set_helper(Some(RustylineHelper::default()));
-
-            let history_file = history_file();
-            if let Err(e) = rl.load_history(&history_file) {
-                debug!(
-                    "failed to load history file on {}, error: {e}",
-                    history_file.display()
-                );
-            }
-        }
-
-        let client = Client::with_urls([&cmd.grpc_addr]);
-        let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
-
-        let query_engine = if let Some(meta_addr) = &cmd.meta_addr {
-            create_query_engine(meta_addr).await.map(Some)?
-        } else {
-            None
-        };
-
-        Ok(Self {
-            rl,
-            prompt: "> ".to_string(),
-            database,
-            query_engine,
-        })
-    }
-
-    /// Parse the next command
-    fn next_command(&mut self) -> Result<ReplCommand> {
-        match self.rl.readline(&self.prompt) {
-            Ok(ref line) => {
-                let request = line.trim();
-
-                let _ = self.rl.add_history_entry(request.to_string());
-
-                request.try_into()
-            }
-            Err(ReadlineError::Eof) | Err(ReadlineError::Interrupted) => Ok(ReplCommand::Exit),
-            // Some sort of real underlying error
-            Err(e) => Err(e).context(ReadlineSnafu),
-        }
-    }
-
-    /// Read Evaluate Print Loop (interactive command line) for GreptimeDB
-    ///
-    /// Inspired / based on repl.rs from InfluxDB IOX
-    pub(crate) async fn run(&mut self) -> Result<()> {
-        println!("Ready for commands. (Hint: try 'help')");
-
-        loop {
-            match self.next_command()? {
-                ReplCommand::Help => {
-                    self.print_help();
-                }
-                ReplCommand::UseDatabase { db_name } => {
-                    if self.execute_sql(format!("USE {db_name}")).await {
-                        println!("Using {db_name}");
-                        self.database.set_schema(&db_name);
-                        self.prompt = format!("[{db_name}] > ");
-                    }
-                }
-                ReplCommand::Sql { sql } => {
-                    let _ = self.execute_sql(sql).await;
-                }
-                ReplCommand::Exit => {
-                    return Ok(());
-                }
-            }
-        }
-    }
-
-    async fn execute_sql(&self, sql: String) -> bool {
-        self.do_execute_sql(sql)
-            .await
-            .map_err(|e| {
-                let status_code = e.status_code();
-                let root_cause = e.output_msg();
-                println!("Error: {}({status_code}), {root_cause}", status_code as u32)
-            })
-            .is_ok()
-    }
-
-    async fn do_execute_sql(&self, sql: String) -> Result<()> {
-        let start = Instant::now();
-
-        let output = if let Some(query_engine) = &self.query_engine {
-            let query_ctx = Arc::new(QueryContext::with(
-                self.database.catalog(),
-                self.database.schema(),
-            ));
-
-            let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
-                .with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
-
-            let plan = query_engine
-                .planner()
-                .plan(&stmt, query_ctx.clone())
-                .await
-                .context(PlanStatementSnafu)?;
-
-            let plan = query_engine
-                .optimize(&query_engine.engine_context(query_ctx), &plan)
-                .context(PlanStatementSnafu)?;
-
-            let plan = DFLogicalSubstraitConvertor {}
-                .encode(&plan, DefaultSerializer)
-                .context(SubstraitEncodeLogicalPlanSnafu)?;
-
-            self.database.logical_plan(plan.to_vec()).await
-        } else {
-            self.database.sql(&sql).await
-        }
-        .context(RequestDatabaseSnafu { sql: &sql })?;
-
-        let either = match output.data {
-            OutputData::Stream(s) => {
-                let x = RecordBatches::try_collect(s)
-                    .await
-                    .context(CollectRecordBatchesSnafu)?;
-                Either::Left(x)
-            }
-            OutputData::RecordBatches(x) => Either::Left(x),
-            OutputData::AffectedRows(rows) => Either::Right(rows),
-        };
-
-        let end = Instant::now();
-
-        match either {
-            Either::Left(recordbatches) => {
-                let total_rows: usize = recordbatches.iter().map(|x| x.num_rows()).sum();
-                if total_rows > 0 {
-                    println!(
-                        "{}",
-                        recordbatches
-                            .pretty_print()
-                            .context(PrettyPrintRecordBatchesSnafu)?
-                    );
-                }
-                println!("Total Rows: {total_rows}")
-            }
-            Either::Right(rows) => println!("Affected Rows: {rows}"),
-        };
-
-        println!("Cost {} ms", (end - start).as_millis());
-        Ok(())
-    }
-}
-
-impl Drop for Repl {
-    fn drop(&mut self) {
-        if self.rl.helper().is_some() {
-            let history_file = history_file();
-            if let Err(e) = self.rl.save_history(&history_file) {
-                debug!(
-                    "failed to save history file on {}, error: {e}",
-                    history_file.display()
-                );
-            }
-        }
-    }
-}
-
-/// Return the location of the history file (defaults to $HOME/".greptimedb_cli_history")
-fn history_file() -> PathBuf {
-    let mut buf = match std::env::var("HOME") {
-        Ok(home) => PathBuf::from(home),
-        Err(_) => PathBuf::new(),
-    };
-    buf.push(".greptimedb_cli_history");
-    buf
-}
-
-async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
-    let mut meta_client = MetaClientBuilder::default().enable_store().build();
-    meta_client
-        .start([meta_addr])
-        .await
-        .context(StartMetaClientSnafu)?;
-    let meta_client = Arc::new(meta_client);
-
-    let cached_meta_backend = Arc::new(
-        CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone()))).build(),
-    );
-    let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
-        CacheRegistryBuilder::default()
-            .add_cache(cached_meta_backend.clone())
-            .build(),
-    );
-    let fundamental_cache_registry =
-        build_fundamental_cache_registry(Arc::new(MetaKvBackend::new(meta_client.clone())));
-    let layered_cache_registry = Arc::new(
-        with_default_composite_cache_registry(
-            layered_cache_builder.add_cache_registry(fundamental_cache_registry),
-        )
-        .context(error::BuildCacheRegistrySnafu)?
-        .build(),
-    );
-
-    let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
-    let catalog_manager = KvBackendCatalogManager::new(
-        information_extension,
-        cached_meta_backend.clone(),
-        layered_cache_registry,
-        None,
-    );
-    let plugins: Plugins = Default::default();
-    let state = Arc::new(QueryEngineState::new(
-        catalog_manager,
-        None,
-        None,
-        None,
-        None,
-        false,
-        plugins.clone(),
-    ));
-
-    Ok(DatafusionQueryEngine::new(state, plugins))
-}
--- a/src/client/Cargo.toml
+++ b/src/client/Cargo.toml
@@ -16,6 +16,7 @@ arc-swap = "1.6"
 arrow-flight.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
+base64.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
 common-grpc.workspace = true
@@ -25,6 +26,7 @@ common-query.workspace = true
 common-recordbatch.workspace = true
 common-telemetry.workspace = true
 enum_dispatch = "0.3"
+futures.workspace = true
 futures-util.workspace = true
 lazy_static.workspace = true
 moka = { workspace = true, features = ["future"] }
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -12,36 +12,49 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::pin::Pin;
+use std::str::FromStr;
+
 use api::v1::auth_header::AuthScheme;
 use api::v1::ddl_request::Expr as DdlExpr;
 use api::v1::greptime_database_client::GreptimeDatabaseClient;
 use api::v1::greptime_request::Request;
 use api::v1::query_request::Query;
 use api::v1::{
-    AlterTableExpr, AuthHeader, CreateTableExpr, DdlRequest, GreptimeRequest, InsertRequests,
-    QueryRequest, RequestHeader,
+    AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
+    InsertRequests, QueryRequest, RequestHeader,
 };
-use arrow_flight::Ticket;
+use arrow_flight::{FlightData, Ticket};
 use async_stream::stream;
+use base64::prelude::BASE64_STANDARD;
+use base64::Engine;
+use common_catalog::build_db_string;
+use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_error::ext::{BoxedError, ErrorExt};
+use common_grpc::flight::do_put::DoPutResponse;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_query::Output;
 use common_recordbatch::error::ExternalSnafu;
 use common_recordbatch::RecordBatchStreamWrapper;
 use common_telemetry::error;
 use common_telemetry::tracing_context::W3cTrace;
-use futures_util::StreamExt;
+use futures::future;
+use futures_util::{Stream, StreamExt, TryStreamExt};
 use prost::Message;
 use snafu::{ensure, ResultExt};
-use tonic::metadata::AsciiMetadataKey;
+use tonic::metadata::{AsciiMetadataKey, MetadataValue};
 use tonic::transport::Channel;

 use crate::error::{
    ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu, InvalidAsciiSnafu,
-    ServerSnafu,
+    InvalidTonicMetadataValueSnafu, ServerSnafu,
 };
 use crate::{from_grpc_response, Client, Result};

+type FlightDataStream = Pin<Box<dyn Stream<Item = FlightData> + Send>>;
+
+type DoPutResponseStream = Pin<Box<dyn Stream<Item = Result<DoPutResponse>>>>;
+
 #[derive(Clone, Debug, Default)]
 pub struct Database {
    // The "catalog" and "schema" to be used in processing the requests at the server side.
@@ -108,16 +121,24 @@ impl Database {
        self.catalog = catalog.into();
    }

-    pub fn catalog(&self) -> &String {
-        &self.catalog
+    fn catalog_or_default(&self) -> &str {
+        if self.catalog.is_empty() {
+            DEFAULT_CATALOG_NAME
+        } else {
+            &self.catalog
+        }
    }

    pub fn set_schema(&mut self, schema: impl Into<String>) {
        self.schema = schema.into();
    }

-    pub fn schema(&self) -> &String {
-        &self.schema
+    fn schema_or_default(&self) -> &str {
+        if self.schema.is_empty() {
+            DEFAULT_SCHEMA_NAME
+        } else {
+            &self.schema
+        }
    }

    pub fn set_timezone(&mut self, timezone: impl Into<String>) {
@@ -310,6 +331,41 @@ impl Database {
            }
        }
    }
+
+    /// Ingest a stream of [RecordBatch]es that belong to a table, using Arrow Flight's "`DoPut`"
+    /// method. The return value is also a stream, produces [DoPutResponse]s.
+    pub async fn do_put(&self, stream: FlightDataStream) -> Result<DoPutResponseStream> {
+        let mut request = tonic::Request::new(stream);
+
+        if let Some(AuthHeader {
+            auth_scheme: Some(AuthScheme::Basic(Basic { username, password })),
+        }) = &self.ctx.auth_header
+        {
+            let encoded = BASE64_STANDARD.encode(format!("{username}:{password}"));
+            let value =
+                MetadataValue::from_str(&encoded).context(InvalidTonicMetadataValueSnafu)?;
+            request.metadata_mut().insert("x-greptime-auth", value);
+        }
+
+        let db_to_put = if !self.dbname.is_empty() {
+            &self.dbname
+        } else {
+            &build_db_string(self.catalog_or_default(), self.schema_or_default())
+        };
+        request.metadata_mut().insert(
+            "x-greptime-db-name",
+            MetadataValue::from_str(db_to_put).context(InvalidTonicMetadataValueSnafu)?,
+        );
+
+        let mut client = self.client.make_flight_client()?;
+        let response = client.mut_inner().do_put(request).await?;
+        let response = response
+            .into_inner()
+            .map_err(Into::into)
+            .and_then(|x| future::ready(DoPutResponse::try_from(x).context(ConvertFlightDataSnafu)))
+            .boxed();
+        Ok(response)
+    }
 }

 #[derive(Default, Debug, Clone)]
--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -19,6 +19,7 @@ use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode};
 use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
 use common_macro::stack_trace_debug;
 use snafu::{location, Location, Snafu};
+use tonic::metadata::errors::InvalidMetadataValue;
 use tonic::{Code, Status};

 #[derive(Snafu)]
@@ -115,6 +116,14 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Invalid Tonic metadata value"))]
+    InvalidTonicMetadataValue {
+        #[snafu(source)]
+        error: InvalidMetadataValue,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -135,7 +144,9 @@ impl ErrorExt for Error {
            | Error::CreateTlsChannel { source, .. } => source.status_code(),
            Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,

-            Error::InvalidAscii { .. } => StatusCode::InvalidArguments,
+            Error::InvalidAscii { .. } | Error::InvalidTonicMetadataValue { .. } => {
+                StatusCode::InvalidArguments
+            }
        }
    }

--- a/src/client/src/lib.rs
+++ b/src/client/src/lib.rs
@@ -16,7 +16,7 @@

 mod client;
 pub mod client_manager;
-mod database;
+pub mod database;
 pub mod error;
 pub mod flow;
 pub mod load_balance;
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -68,7 +68,6 @@ query.workspace = true
 rand.workspace = true
 regex.workspace = true
 reqwest.workspace = true
-rustyline = "10.1"
 serde.workspace = true
 serde_json.workspace = true
 servers.workspace = true
--- a/src/cmd/src/error.rs
+++ b/src/cmd/src/error.rs
@@ -17,7 +17,6 @@ use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
-use rustyline::error::ReadlineError;
 use snafu::{Location, Snafu};

 #[derive(Snafu)]
@@ -181,52 +180,6 @@ pub enum Error {
    #[snafu(display("Invalid REPL command: {reason}"))]
    InvalidReplCommand { reason: String },

-    #[snafu(display("Cannot create REPL"))]
-    ReplCreation {
-        #[snafu(source)]
-        error: ReadlineError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Error reading command"))]
-    Readline {
-        #[snafu(source)]
-        error: ReadlineError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Failed to request database, sql: {sql}"))]
-    RequestDatabase {
-        sql: String,
-        #[snafu(source)]
-        source: client::Error,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Failed to collect RecordBatches"))]
-    CollectRecordBatches {
-        #[snafu(implicit)]
-        location: Location,
-        source: common_recordbatch::error::Error,
-    },
-
-    #[snafu(display("Failed to pretty print Recordbatches"))]
-    PrettyPrintRecordBatches {
-        #[snafu(implicit)]
-        location: Location,
-        source: common_recordbatch::error::Error,
-    },
-
-    #[snafu(display("Failed to start Meta client"))]
-    StartMetaClient {
-        #[snafu(implicit)]
-        location: Location,
-        source: meta_client::error::Error,
-    },
-
    #[snafu(display("Failed to parse SQL: {}", sql))]
    ParseSql {
        sql: String,
@@ -242,13 +195,6 @@ pub enum Error {
        source: query::error::Error,
    },

-    #[snafu(display("Failed to encode logical plan in substrait"))]
-    SubstraitEncodeLogicalPlan {
-        #[snafu(implicit)]
-        location: Location,
-        source: substrait::error::Error,
-    },
-
    #[snafu(display("Failed to load layered config"))]
    LoadLayeredConfig {
        #[snafu(source(from(common_config::error::Error, Box::new)))]
@@ -395,17 +341,10 @@ impl ErrorExt for Error {
            | Error::StopProcedureManager { source, .. } => source.status_code(),
            Error::BuildWalOptionsAllocator { source, .. }
            | Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
-            Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
-                StatusCode::Internal
-            }
-            Error::RequestDatabase { source, .. } => source.status_code(),
-            Error::CollectRecordBatches { source, .. }
-            | Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
-            Error::StartMetaClient { source, .. } => source.status_code(),
+            Error::HttpQuerySql { .. } => StatusCode::Internal,
            Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
                source.status_code()
            }
-            Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),

            Error::SerdeJson { .. }
            | Error::FileIo { .. }
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -32,7 +32,9 @@ use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
 use common_telemetry::logging::TracingOptions;
 use common_version::{short_version, version};
-use flow::{FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendInvoker};
+use flow::{
+    FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendClient, FrontendInvoker,
+};
 use meta_client::{MetaClientOptions, MetaClientType};
 use snafu::{ensure, OptionExt, ResultExt};
 use tracing_appender::non_blocking::WorkerGuard;
@@ -313,12 +315,14 @@ impl StartCommand {
        );

        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
+        let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
        let flownode_builder = FlownodeBuilder::new(
            opts.clone(),
            Plugins::new(),
            table_metadata_manager,
            catalog_manager.clone(),
            flow_metadata_manager,
+            Arc::new(frontend_client),
        )
        .with_heartbeat_task(heartbeat_task);

@@ -341,7 +345,7 @@ impl StartCommand {
        let client = Arc::new(NodeClients::new(channel_config));

        let invoker = FrontendInvoker::build_from(
-            flownode.flow_worker_manager().clone(),
+            flownode.flow_engine().streaming_engine(),
            catalog_manager.clone(),
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
@@ -351,7 +355,9 @@ impl StartCommand {
        .await
        .context(StartFlownodeSnafu)?;
        flownode
-            .flow_worker_manager()
+            .flow_engine()
+            .streaming_engine()
+            // TODO(discord9): refactor and avoid circular reference
            .set_frontend_invoker(invoker)
            .await;

--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -132,7 +132,7 @@ impl SubCommand {
 }

 #[derive(Debug, Default, Parser)]
-struct StartCommand {
+pub struct StartCommand {
    /// The address to bind the gRPC server.
    #[clap(long, alias = "bind-addr")]
    rpc_bind_addr: Option<String>,
@@ -172,7 +172,7 @@ struct StartCommand {
 }

 impl StartCommand {
-    fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
+    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
        let mut opts = MetasrvOptions::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
@@ -261,7 +261,7 @@ impl StartCommand {
        Ok(())
    }

-    async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
+    pub async fn build(&self, opts: MetasrvOptions) -> Result<Instance> {
        common_runtime::init_global_runtimes(&opts.runtime);

        let guard = common_telemetry::init_global_logging(
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -56,8 +56,8 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
 use datanode::region_server::RegionServer;
 use file_engine::config::EngineConfig as FileEngineConfig;
 use flow::{
-    FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeInstance, FlownodeOptions,
-    FrontendInvoker,
+    FlowConfig, FlownodeBuilder, FlownodeInstance, FlownodeOptions, FrontendClient,
+    FrontendInvoker, GrpcQueryHandlerWithBoxedError, StreamingEngine,
 };
 use frontend::frontend::{Frontend, FrontendOptions};
 use frontend::instance::builder::FrontendBuilder;
@@ -523,12 +523,18 @@ impl StartCommand {
            flow: opts.flow.clone(),
            ..Default::default()
        };
+
+        // for standalone not use grpc, but get a handler to frontend grpc client without
+        // actually make a connection
+        let (frontend_client, frontend_instance_handler) =
+            FrontendClient::from_empty_grpc_handler();
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
+            Arc::new(frontend_client.clone()),
        );
        let flownode = flow_builder
            .build()
@@ -538,15 +544,15 @@ impl StartCommand {

        // set the ref to query for the local flow state
        {
-            let flow_worker_manager = flownode.flow_worker_manager();
+            let flow_streaming_engine = flownode.flow_engine().streaming_engine();
            information_extension
-                .set_flow_worker_manager(flow_worker_manager.clone())
+                .set_flow_streaming_engine(flow_streaming_engine)
                .await;
        }

        let node_manager = Arc::new(StandaloneDatanodeManager {
            region_server: datanode.region_server(),
-            flow_server: flownode.flow_worker_manager(),
+            flow_server: flownode.flow_engine(),
        });

        let table_id_sequence = Arc::new(
@@ -600,10 +606,19 @@ impl StartCommand {
        .context(error::StartFrontendSnafu)?;
        let fe_instance = Arc::new(fe_instance);

-        let flow_worker_manager = flownode.flow_worker_manager();
+        // set the frontend client for flownode
+        let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
+        let weak_grpc_handler = Arc::downgrade(&grpc_handler);
+        frontend_instance_handler
+            .lock()
+            .unwrap()
+            .replace(weak_grpc_handler);
+
+        // set the frontend invoker for flownode
+        let flow_streaming_engine = flownode.flow_engine().streaming_engine();
        // flow server need to be able to use frontend to write insert requests back
        let invoker = FrontendInvoker::build_from(
-            flow_worker_manager.clone(),
+            flow_streaming_engine.clone(),
            catalog_manager.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
@@ -612,7 +627,7 @@ impl StartCommand {
        )
        .await
        .context(error::StartFlownodeSnafu)?;
-        flow_worker_manager.set_frontend_invoker(invoker).await;
+        flow_streaming_engine.set_frontend_invoker(invoker).await;

        let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
            .context(error::ServersSnafu)?;
@@ -688,7 +703,7 @@ pub struct StandaloneInformationExtension {
    region_server: RegionServer,
    procedure_manager: ProcedureManagerRef,
    start_time_ms: u64,
-    flow_worker_manager: RwLock<Option<Arc<FlowWorkerManager>>>,
+    flow_streaming_engine: RwLock<Option<Arc<StreamingEngine>>>,
 }

 impl StandaloneInformationExtension {
@@ -697,14 +712,14 @@ impl StandaloneInformationExtension {
            region_server,
            procedure_manager,
            start_time_ms: common_time::util::current_time_millis() as u64,
-            flow_worker_manager: RwLock::new(None),
+            flow_streaming_engine: RwLock::new(None),
        }
    }

-    /// Set the flow worker manager for the standalone instance.
-    pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc<FlowWorkerManager>) {
-        let mut guard = self.flow_worker_manager.write().await;
-        *guard = Some(flow_worker_manager);
+    /// Set the flow streaming engine for the standalone instance.
+    pub async fn set_flow_streaming_engine(&self, flow_streaming_engine: Arc<StreamingEngine>) {
+        let mut guard = self.flow_streaming_engine.write().await;
+        *guard = Some(flow_streaming_engine);
    }
 }

@@ -773,6 +788,8 @@ impl InformationExtension for StandaloneInformationExtension {
                    sst_size: region_stat.sst_size,
                    index_size: region_stat.index_size,
                    region_manifest: region_stat.manifest.into(),
+                    data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
+                    metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
                }
            })
            .collect::<Vec<_>>();
@@ -781,7 +798,7 @@ impl InformationExtension for StandaloneInformationExtension {

    async fn flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error> {
        Ok(Some(
-            self.flow_worker_manager
+            self.flow_streaming_engine
                .read()
                .await
                .as_ref()
--- a/src/cmd/tests/cli.rs
+++ b/src/cmd/tests/cli.rs
@@ -1,148 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#[cfg(target_os = "macos")]
-mod tests {
-    use std::path::PathBuf;
-    use std::process::{Command, Stdio};
-    use std::time::Duration;
-
-    use common_test_util::temp_dir::create_temp_dir;
-    use rexpect::session::PtyReplSession;
-
-    struct Repl {
-        repl: PtyReplSession,
-    }
-
-    impl Repl {
-        fn send_line(&mut self, line: &str) {
-            let _ = self.repl.send_line(line).unwrap();
-
-            // read a line to consume the prompt
-            let _ = self.read_line();
-        }
-
-        fn read_line(&mut self) -> String {
-            self.repl.read_line().unwrap()
-        }
-
-        fn read_expect(&mut self, expect: &str) {
-            assert_eq!(self.read_line(), expect);
-        }
-
-        fn read_contains(&mut self, pat: &str) {
-            assert!(self.read_line().contains(pat));
-        }
-    }
-
-    // TODO(LFC): Un-ignore this REPL test.
-    // Ignore this REPL test because some logical plans like create database are not supported yet in Datanode.
-    #[ignore]
-    #[test]
-    fn test_repl() {
-        let data_home = create_temp_dir("data");
-        let wal_dir = create_temp_dir("wal");
-
-        let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
-        bin_path.push("../../target/debug");
-        let bin_path = bin_path.to_str().unwrap();
-
-        let mut datanode = Command::new("./greptime")
-            .current_dir(bin_path)
-            .args([
-                "datanode",
-                "start",
-                "--rpc-bind-addr=0.0.0.0:4321",
-                "--node-id=1",
-                &format!("--data-home={}", data_home.path().display()),
-                &format!("--wal-dir={}", wal_dir.path().display()),
-            ])
-            .stdout(Stdio::null())
-            .spawn()
-            .unwrap();
-
-        // wait for Datanode actually started
-        std::thread::sleep(Duration::from_secs(3));
-
-        let mut repl_cmd = Command::new("./greptime");
-        let _ = repl_cmd.current_dir(bin_path).args([
-            "--log-level=off",
-            "cli",
-            "attach",
-            "--grpc-bind-addr=0.0.0.0:4321",
-            // history commands can sneaky into stdout and mess up our tests, so disable it
-            "--disable-helper",
-        ]);
-        let pty_session = rexpect::session::spawn_command(repl_cmd, Some(5_000)).unwrap();
-        let repl = PtyReplSession {
-            prompt: "> ".to_string(),
-            pty_session,
-            quit_command: None,
-            echo_on: false,
-        };
-        let repl = &mut Repl { repl };
-        repl.read_expect("Ready for commands. (Hint: try 'help')");
-
-        test_create_database(repl);
-
-        test_use_database(repl);
-
-        test_create_table(repl);
-
-        test_insert(repl);
-
-        test_select(repl);
-
-        datanode.kill().unwrap();
-        let _ = datanode.wait().unwrap();
-    }
-
-    fn test_create_database(repl: &mut Repl) {
-        repl.send_line("CREATE DATABASE db;");
-        repl.read_expect("Affected Rows: 1");
-        repl.read_contains("Cost");
-    }
-
-    fn test_use_database(repl: &mut Repl) {
-        repl.send_line("USE db");
-        repl.read_expect("Total Rows: 0");
-        repl.read_contains("Cost");
-        repl.read_expect("Using db");
-    }
-
-    fn test_create_table(repl: &mut Repl) {
-        repl.send_line("CREATE TABLE t(x STRING, ts TIMESTAMP TIME INDEX);");
-        repl.read_expect("Affected Rows: 0");
-        repl.read_contains("Cost");
-    }
-
-    fn test_insert(repl: &mut Repl) {
-        repl.send_line("INSERT INTO t(x, ts) VALUES ('hello', 1676895812239);");
-        repl.read_expect("Affected Rows: 1");
-        repl.read_contains("Cost");
-    }
-
-    fn test_select(repl: &mut Repl) {
-        repl.send_line("SELECT * FROM t;");
-
-        repl.read_expect("+-------+-------------------------+");
-        repl.read_expect("| x     | ts                      |");
-        repl.read_expect("+-------+-------------------------+");
-        repl.read_expect("| hello | 2023-02-20T12:23:32.239 |");
-        repl.read_expect("+-------+-------------------------+");
-        repl.read_expect("Total Rows: 1");
-
-        repl.read_contains("Cost");
-    }
-}
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -74,6 +74,7 @@ fn test_load_datanode_example_config() {
                RegionEngineConfig::File(FileEngineConfig {}),
                RegionEngineConfig::Metric(MetricEngineConfig {
                    experimental_sparse_primary_key_encoding: false,
+                    flush_metadata_region_interval: Duration::from_secs(30),
                }),
            ],
            logging: LoggingOptions {
@@ -216,6 +217,7 @@ fn test_load_standalone_example_config() {
                RegionEngineConfig::File(FileEngineConfig {}),
                RegionEngineConfig::Metric(MetricEngineConfig {
                    experimental_sparse_primary_key_encoding: false,
+                    flush_metadata_region_interval: Duration::from_secs(30),
                }),
            ],
            storage: StorageConfig {
--- a/src/common/base/src/plugins.rs
+++ b/src/common/base/src/plugins.rs
@@ -31,7 +31,8 @@ impl Plugins {
    }

    pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
-        let _ = self.write().insert(value);
+        let last = self.write().insert(value);
+        assert!(last.is_none(), "each type of plugins must be one and only");
    }

    pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
@@ -137,4 +138,12 @@ mod tests {
        assert_eq!(plugins.len(), 2);
        assert!(!plugins.is_empty());
    }
+
+    #[test]
+    #[should_panic(expected = "each type of plugins must be one and only")]
+    fn test_plugin_uniqueness() {
+        let plugins = Plugins::new();
+        plugins.insert(1i32);
+        plugins.insert(2i32);
+    }
 }
--- a/src/common/error/Cargo.toml
+++ b/src/common/error/Cargo.toml
@@ -12,3 +12,6 @@ http.workspace = true
 snafu.workspace = true
 strum.workspace = true
 tonic.workspace = true
+
+[dev-dependencies]
+common-macro.workspace = true
--- a/src/common/error/src/ext.rs
+++ b/src/common/error/src/ext.rs
@@ -42,7 +42,7 @@ pub trait ErrorExt: StackError {
                if let Some(external_error) = error.source() {
                    let external_root = external_error.sources().last().unwrap();

-                    if error.to_string().is_empty() {
+                    if error.transparent() {
                        format!("{external_root}")
                    } else {
                        format!("{error}: {external_root}")
@@ -86,6 +86,14 @@ pub trait StackError: std::error::Error {
        }
        result
    }
+
+    /// Indicates whether this error is "transparent", that it delegates its "display" and "source"
+    /// to the underlying error. Could be useful when you are just wrapping some external error,
+    /// **AND** can not or would not provide meaningful contextual info. For example, the
+    /// `DataFusionError`.
+    fn transparent(&self) -> bool {
+        false
+    }
 }

 impl<T: ?Sized + StackError> StackError for Arc<T> {
--- a/src/common/error/tests/ext.rs
+++ b/src/common/error/tests/ext.rs
@@ -0,0 +1,115 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+
+use common_error::ext::{ErrorExt, PlainError, StackError};
+use common_error::status_code::StatusCode;
+use common_macro::stack_trace_debug;
+use snafu::{Location, ResultExt, Snafu};
+
+#[derive(Snafu)]
+#[stack_trace_debug]
+enum MyError {
+    #[snafu(display(r#"A normal error with "display" attribute, message "{}""#, message))]
+    Normal {
+        message: String,
+        #[snafu(source)]
+        error: PlainError,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(transparent)]
+    Transparent {
+        #[snafu(source)]
+        error: PlainError,
+        #[snafu(implicit)]
+        location: Location,
+    },
+}
+
+impl ErrorExt for MyError {
+    fn status_code(&self) -> StatusCode {
+        StatusCode::Unexpected
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+fn normal_error() -> Result<(), MyError> {
+    let plain_error = PlainError::new("<root cause>".to_string(), StatusCode::Unexpected);
+    Err(plain_error).context(NormalSnafu { message: "blabla" })
+}
+
+fn transparent_error() -> Result<(), MyError> {
+    let plain_error = PlainError::new("<root cause>".to_string(), StatusCode::Unexpected);
+    Err(plain_error)?
+}
+
+#[test]
+fn test_output_msg() {
+    let result = normal_error();
+    assert_eq!(
+        result.unwrap_err().output_msg(),
+        r#"A normal error with "display" attribute, message "blabla": <root cause>"#
+    );
+
+    let result = transparent_error();
+    assert_eq!(result.unwrap_err().output_msg(), "<root cause>");
+}
+
+#[test]
+fn test_to_string() {
+    let result = normal_error();
+    assert_eq!(
+        result.unwrap_err().to_string(),
+        r#"A normal error with "display" attribute, message "blabla""#
+    );
+
+    let result = transparent_error();
+    assert_eq!(result.unwrap_err().to_string(), "<root cause>");
+}
+
+#[test]
+fn test_debug_format() {
+    let result = normal_error();
+    let debug_output = format!("{:?}", result.unwrap_err());
+    let normalized_output = debug_output.replace('\\', "/");
+    assert_eq!(
+        normalized_output,
+        r#"0: A normal error with "display" attribute, message "blabla", at src/common/error/tests/ext.rs:55:22
+1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
+    );
+
+    let result = transparent_error();
+    let debug_output = format!("{:?}", result.unwrap_err());
+    let normalized_output = debug_output.replace('\\', "/");
+    assert_eq!(
+        normalized_output,
+        r#"0: <transparent>, at src/common/error/tests/ext.rs:60:5
+1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
+    );
+}
+
+#[test]
+fn test_transparent_flag() {
+    let result = normal_error();
+    assert!(!result.unwrap_err().transparent());
+
+    let result = transparent_error();
+    assert!(result.unwrap_err().transparent());
+}
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -18,6 +18,7 @@ api.workspace = true
 arc-swap = "1.0"
 async-trait.workspace = true
 bincode = "1.3"
+catalog.workspace = true
 chrono.workspace = true
 common-base.workspace = true
 common-catalog.workspace = true
--- a/src/common/function/src/admin/migrate_region.rs
+++ b/src/common/function/src/admin/migrate_region.rs
@@ -25,12 +25,13 @@ use session::context::QueryContextRef;
 use crate::handlers::ProcedureServiceHandlerRef;
 use crate::helper::cast_u64;

-const DEFAULT_TIMEOUT_SECS: u64 = 30;
+/// The default timeout for migrate region procedure.
+const DEFAULT_TIMEOUT_SECS: u64 = 300;

 /// A function to migrate a region from source peer to target peer.
 /// Returns the submitted procedure id if success. Only available in cluster mode.
 ///
-/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
+/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(300 seconds).
 /// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
 ///
 /// The parameters:
--- a/src/common/function/src/handlers.rs
+++ b/src/common/function/src/handlers.rs
@@ -15,6 +15,7 @@
 use std::sync::Arc;

 use async_trait::async_trait;
+use catalog::CatalogManagerRef;
 use common_base::AffectedRows;
 use common_meta::rpc::procedure::{
    AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
@@ -72,6 +73,9 @@ pub trait ProcedureServiceHandler: Send + Sync {

    /// Remove a region follower from a region.
    async fn remove_region_follower(&self, request: RemoveRegionFollowerRequest) -> Result<()>;
+
+    /// Get the catalog manager
+    fn catalog_manager(&self) -> &CatalogManagerRef;
 }

 /// This flow service handler is only use for flush flow for now.
--- a/src/common/function/src/scalars/matches_term.rs
+++ b/src/common/function/src/scalars/matches_term.rs
@@ -12,8 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::fmt;
+use std::iter::repeat_n;
 use std::sync::Arc;
-use std::{fmt, iter};

 use common_query::error::{InvalidFuncArgsSnafu, Result};
 use common_query::prelude::Volatility;
@@ -126,9 +127,10 @@ impl Function for MatchesTermFunction {
            let term = term_column.get_ref(0).as_string().unwrap();
            match term {
                None => {
-                    return Ok(Arc::new(BooleanVector::from_iter(
-                        iter::repeat(None).take(text_column.len()),
-                    )));
+                    return Ok(Arc::new(BooleanVector::from_iter(repeat_n(
+                        None,
+                        text_column.len(),
+                    ))));
                }
                Some(term) => Some(MatchesTermFinder::new(term)),
            }
@@ -217,7 +219,7 @@ impl MatchesTermFinder {
        }

        let mut pos = 0;
-        while let Some(found_pos) = self.finder.find(text[pos..].as_bytes()) {
+        while let Some(found_pos) = self.finder.find(&text.as_bytes()[pos..]) {
            let actual_pos = pos + found_pos;

            let prev_ok = self.starts_with_non_alnum
--- a/src/common/function/src/scalars/uddsketch_calc.rs
+++ b/src/common/function/src/scalars/uddsketch_calc.rs
@@ -115,6 +115,13 @@ impl Function for UddSketchCalcFunction {
                }
            };

+            // Check if the sketch is empty, if so, return null
+            // This is important to avoid panics when calling estimate_quantile on an empty sketch
+            // In practice, this will happen if input is all null
+            if sketch.bucket_iter().count() == 0 {
+                builder.push_null();
+                continue;
+            }
            // Compute the estimated quantile from the sketch
            let result = sketch.estimate_quantile(perc);
            builder.push(Some(result));
--- a/src/common/function/src/state.rs
+++ b/src/common/function/src/state.rs
@@ -34,6 +34,7 @@ impl FunctionState {

        use api::v1::meta::ProcedureStatus;
        use async_trait::async_trait;
+        use catalog::CatalogManagerRef;
        use common_base::AffectedRows;
        use common_meta::rpc::procedure::{
            AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
@@ -80,6 +81,10 @@ impl FunctionState {
            ) -> Result<()> {
                Ok(())
            }
+
+            fn catalog_manager(&self) -> &CatalogManagerRef {
+                unimplemented!()
+            }
        }

        #[async_trait]
--- a/src/common/grpc/Cargo.toml
+++ b/src/common/grpc/Cargo.toml
@@ -23,8 +23,11 @@ flatbuffers = "24"
 hyper.workspace = true
 lazy_static.workspace = true
 prost.workspace = true
+serde.workspace = true
+serde_json.workspace = true
 snafu.workspace = true
 tokio.workspace = true
+tokio-util.workspace = true
 tonic.workspace = true
 tower.workspace = true

--- a/src/common/grpc/src/channel_manager.rs
+++ b/src/common/grpc/src/channel_manager.rs
@@ -22,6 +22,7 @@ use dashmap::mapref::entry::Entry;
 use dashmap::DashMap;
 use lazy_static::lazy_static;
 use snafu::{OptionExt, ResultExt};
+use tokio_util::sync::CancellationToken;
 use tonic::transport::{
    Certificate, Channel as InnerChannel, ClientTlsConfig, Endpoint, Identity, Uri,
 };
@@ -39,18 +40,48 @@ lazy_static! {
    static ref ID: AtomicU64 = AtomicU64::new(0);
 }

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub struct ChannelManager {
+    inner: Arc<Inner>,
+}
+
+#[derive(Debug)]
+struct Inner {
    id: u64,
    config: ChannelConfig,
    client_tls_config: Option<ClientTlsConfig>,
    pool: Arc<Pool>,
-    channel_recycle_started: Arc<AtomicBool>,
+    channel_recycle_started: AtomicBool,
+    cancel: CancellationToken,
 }

-impl Default for ChannelManager {
+impl Default for Inner {
    fn default() -> Self {
-        ChannelManager::with_config(ChannelConfig::default())
+        Self::with_config(ChannelConfig::default())
+    }
+}
+
+impl Drop for Inner {
+    fn drop(&mut self) {
+        // Cancel the channel recycle task.
+        self.cancel.cancel();
+    }
+}
+
+impl Inner {
+    fn with_config(config: ChannelConfig) -> Self {
+        let id = ID.fetch_add(1, Ordering::Relaxed);
+        let pool = Arc::new(Pool::default());
+        let cancel = CancellationToken::new();
+
+        Self {
+            id,
+            config,
+            client_tls_config: None,
+            pool,
+            channel_recycle_started: AtomicBool::new(false),
+            cancel,
+        }
    }
 }

@@ -60,19 +91,14 @@ impl ChannelManager {
    }

    pub fn with_config(config: ChannelConfig) -> Self {
-        let id = ID.fetch_add(1, Ordering::Relaxed);
-        let pool = Arc::new(Pool::default());
+        let inner = Inner::with_config(config);
        Self {
-            id,
-            config,
-            client_tls_config: None,
-            pool,
-            channel_recycle_started: Arc::new(AtomicBool::new(false)),
+            inner: Arc::new(inner),
        }
    }

    pub fn with_tls_config(config: ChannelConfig) -> Result<Self> {
-        let mut cm = Self::with_config(config.clone());
+        let mut inner = Inner::with_config(config.clone());

        // setup tls
        let path_config = config.client_tls.context(InvalidTlsConfigSnafu {
@@ -88,17 +114,23 @@ impl ChannelManager {
            .context(InvalidConfigFilePathSnafu)?;
        let client_identity = Identity::from_pem(client_cert, client_key);

-        cm.client_tls_config = Some(
+        inner.client_tls_config = Some(
            ClientTlsConfig::new()
                .ca_certificate(server_root_ca_cert)
                .identity(client_identity),
        );

-        Ok(cm)
+        Ok(Self {
+            inner: Arc::new(inner),
+        })
    }

    pub fn config(&self) -> &ChannelConfig {
-        &self.config
+        &self.inner.config
+    }
+
+    fn pool(&self) -> &Arc<Pool> {
+        &self.inner.pool
    }

    pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
@@ -106,12 +138,12 @@ impl ChannelManager {

        let addr = addr.as_ref();
        // It will acquire the read lock.
-        if let Some(inner_ch) = self.pool.get(addr) {
+        if let Some(inner_ch) = self.pool().get(addr) {
            return Ok(inner_ch);
        }

        // It will acquire the write lock.
-        let entry = match self.pool.entry(addr.to_string()) {
+        let entry = match self.pool().entry(addr.to_string()) {
            Entry::Occupied(entry) => {
                entry.get().increase_access();
                entry.into_ref()
@@ -150,7 +182,7 @@ impl ChannelManager {
            access: AtomicUsize::new(1),
            use_default_connector: false,
        };
-        self.pool.put(addr, channel);
+        self.pool().put(addr, channel);

        Ok(inner_channel)
    }
@@ -159,11 +191,11 @@ impl ChannelManager {
    where
        F: FnMut(&String, &mut Channel) -> bool,
    {
-        self.pool.retain_channel(f);
+        self.pool().retain_channel(f);
    }

    fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
-        let http_prefix = if self.client_tls_config.is_some() {
+        let http_prefix = if self.inner.client_tls_config.is_some() {
            "https"
        } else {
            "http"
@@ -172,51 +204,52 @@ impl ChannelManager {
        let mut endpoint =
            Endpoint::new(format!("{http_prefix}://{addr}")).context(CreateChannelSnafu)?;

-        if let Some(dur) = self.config.timeout {
+        if let Some(dur) = self.config().timeout {
            endpoint = endpoint.timeout(dur);
        }
-        if let Some(dur) = self.config.connect_timeout {
+        if let Some(dur) = self.config().connect_timeout {
            endpoint = endpoint.connect_timeout(dur);
        }
-        if let Some(limit) = self.config.concurrency_limit {
+        if let Some(limit) = self.config().concurrency_limit {
            endpoint = endpoint.concurrency_limit(limit);
        }
-        if let Some((limit, dur)) = self.config.rate_limit {
+        if let Some((limit, dur)) = self.config().rate_limit {
            endpoint = endpoint.rate_limit(limit, dur);
        }
-        if let Some(size) = self.config.initial_stream_window_size {
+        if let Some(size) = self.config().initial_stream_window_size {
            endpoint = endpoint.initial_stream_window_size(size);
        }
-        if let Some(size) = self.config.initial_connection_window_size {
+        if let Some(size) = self.config().initial_connection_window_size {
            endpoint = endpoint.initial_connection_window_size(size);
        }
-        if let Some(dur) = self.config.http2_keep_alive_interval {
+        if let Some(dur) = self.config().http2_keep_alive_interval {
            endpoint = endpoint.http2_keep_alive_interval(dur);
        }
-        if let Some(dur) = self.config.http2_keep_alive_timeout {
+        if let Some(dur) = self.config().http2_keep_alive_timeout {
            endpoint = endpoint.keep_alive_timeout(dur);
        }
-        if let Some(enabled) = self.config.http2_keep_alive_while_idle {
+        if let Some(enabled) = self.config().http2_keep_alive_while_idle {
            endpoint = endpoint.keep_alive_while_idle(enabled);
        }
-        if let Some(enabled) = self.config.http2_adaptive_window {
+        if let Some(enabled) = self.config().http2_adaptive_window {
            endpoint = endpoint.http2_adaptive_window(enabled);
        }
-        if let Some(tls_config) = &self.client_tls_config {
+        if let Some(tls_config) = &self.inner.client_tls_config {
            endpoint = endpoint
                .tls_config(tls_config.clone())
                .context(CreateChannelSnafu)?;
        }

        endpoint = endpoint
-            .tcp_keepalive(self.config.tcp_keepalive)
-            .tcp_nodelay(self.config.tcp_nodelay);
+            .tcp_keepalive(self.config().tcp_keepalive)
+            .tcp_nodelay(self.config().tcp_nodelay);

        Ok(endpoint)
    }

    fn trigger_channel_recycling(&self) {
        if self
+            .inner
            .channel_recycle_started
            .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
            .is_err()
@@ -224,13 +257,15 @@ impl ChannelManager {
            return;
        }

-        let pool = self.pool.clone();
-        let _handle = common_runtime::spawn_global(async {
-            recycle_channel_in_loop(pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
+        let pool = self.pool().clone();
+        let cancel = self.inner.cancel.clone();
+        let id = self.inner.id;
+        let _handle = common_runtime::spawn_global(async move {
+            recycle_channel_in_loop(pool, id, cancel, RECYCLE_CHANNEL_INTERVAL_SECS).await;
        });
        info!(
            "ChannelManager: {}, channel recycle is started, running in the background!",
-            self.id
+            self.inner.id
        );
    }
 }
@@ -443,11 +478,23 @@ impl Pool {
    }
 }

-async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
+async fn recycle_channel_in_loop(
+    pool: Arc<Pool>,
+    id: u64,
+    cancel: CancellationToken,
+    interval_secs: u64,
+) {
    let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));

    loop {
-        let _ = interval.tick().await;
+        tokio::select! {
+            _ = cancel.cancelled() => {
+                info!("Stop channel recycle, ChannelManager id: {}", id);
+                break;
+            },
+            _ = interval.tick() => {}
+        }
+
        pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
    }
 }
@@ -461,11 +508,7 @@ mod tests {
    #[should_panic]
    #[test]
    fn test_invalid_addr() {
-        let pool = Arc::new(Pool::default());
-        let mgr = ChannelManager {
-            pool,
-            ..Default::default()
-        };
+        let mgr = ChannelManager::default();
        let addr = "http://test";

        let _ = mgr.get(addr).unwrap();
@@ -475,7 +518,9 @@ mod tests {
    async fn test_access_count() {
        let mgr = ChannelManager::new();
        // Do not start recycle
-        mgr.channel_recycle_started.store(true, Ordering::Relaxed);
+        mgr.inner
+            .channel_recycle_started
+            .store(true, Ordering::Relaxed);
        let mgr = Arc::new(mgr);
        let addr = "test_uri";

@@ -493,12 +538,12 @@ mod tests {
            join.await.unwrap();
        }

-        assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
+        assert_eq!(1000, mgr.pool().get_access(addr).unwrap());

-        mgr.pool
+        mgr.pool()
            .retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);

-        assert_eq!(0, mgr.pool.get_access(addr).unwrap());
+        assert_eq!(0, mgr.pool().get_access(addr).unwrap());
    }

    #[test]
@@ -624,4 +669,49 @@ mod tests {
            true
        });
    }
+
+    #[tokio::test]
+    async fn test_pool_release_with_channel_recycle() {
+        let mgr = ChannelManager::new();
+
+        let pool_holder = mgr.pool().clone();
+
+        // start channel recycle task
+        let addr = "test_addr";
+        let _ = mgr.get(addr);
+
+        let mgr_clone_1 = mgr.clone();
+        let mgr_clone_2 = mgr.clone();
+        assert_eq!(3, Arc::strong_count(mgr.pool()));
+
+        drop(mgr_clone_1);
+        drop(mgr_clone_2);
+        assert_eq!(3, Arc::strong_count(mgr.pool()));
+
+        drop(mgr);
+
+        // wait for the channel recycle task to finish
+        tokio::time::sleep(Duration::from_millis(10)).await;
+
+        assert_eq!(1, Arc::strong_count(&pool_holder));
+    }
+
+    #[tokio::test]
+    async fn test_pool_release_without_channel_recycle() {
+        let mgr = ChannelManager::new();
+
+        let pool_holder = mgr.pool().clone();
+
+        let mgr_clone_1 = mgr.clone();
+        let mgr_clone_2 = mgr.clone();
+        assert_eq!(2, Arc::strong_count(mgr.pool()));
+
+        drop(mgr_clone_1);
+        drop(mgr_clone_2);
+        assert_eq!(2, Arc::strong_count(mgr.pool()));
+
+        drop(mgr);
+
+        assert_eq!(1, Arc::strong_count(&pool_holder));
+    }
 }
--- a/src/common/grpc/src/error.rs
+++ b/src/common/grpc/src/error.rs
@@ -97,6 +97,14 @@ pub enum Error {

    #[snafu(display("Not supported: {}", feat))]
    NotSupported { feat: String },
+
+    #[snafu(display("Failed to serde Json"))]
+    SerdeJson {
+        #[snafu(source)]
+        error: serde_json::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 impl ErrorExt for Error {
@@ -110,7 +118,8 @@ impl ErrorExt for Error {

            Error::CreateChannel { .. }
            | Error::Conversion { .. }
-            | Error::DecodeFlightData { .. } => StatusCode::Internal,
+            | Error::DecodeFlightData { .. }
+            | Error::SerdeJson { .. } => StatusCode::Internal,

            Error::CreateRecordBatch { source, .. } => source.status_code(),
            Error::ConvertArrowSchema { source, .. } => source.status_code(),
--- a/src/common/grpc/src/flight.rs
+++ b/src/common/grpc/src/flight.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+pub mod do_put;
+
 use std::collections::HashMap;
 use std::sync::Arc;

--- a/src/common/grpc/src/flight/do_put.rs
+++ b/src/common/grpc/src/flight/do_put.rs
@@ -0,0 +1,93 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use arrow_flight::PutResult;
+use common_base::AffectedRows;
+use serde::{Deserialize, Serialize};
+use snafu::ResultExt;
+
+use crate::error::{Error, SerdeJsonSnafu};
+
+/// The metadata for "DoPut" requests and responses.
+///
+/// Currently, there's only a "request_id", for coordinating requests and responses in the streams.
+/// Client can set a unique request id in this metadata, and the server will return the same id in
+/// the corresponding response. In doing so, a client can know how to do with its pending requests.
+#[derive(Serialize, Deserialize)]
+pub struct DoPutMetadata {
+    request_id: i64,
+}
+
+impl DoPutMetadata {
+    pub fn new(request_id: i64) -> Self {
+        Self { request_id }
+    }
+
+    pub fn request_id(&self) -> i64 {
+        self.request_id
+    }
+}
+
+/// The response in the "DoPut" returned stream.
+#[derive(Serialize, Deserialize)]
+pub struct DoPutResponse {
+    /// The same "request_id" in the request; see the [DoPutMetadata].
+    request_id: i64,
+    /// The successfully ingested rows number.
+    affected_rows: AffectedRows,
+}
+
+impl DoPutResponse {
+    pub fn new(request_id: i64, affected_rows: AffectedRows) -> Self {
+        Self {
+            request_id,
+            affected_rows,
+        }
+    }
+
+    pub fn request_id(&self) -> i64 {
+        self.request_id
+    }
+
+    pub fn affected_rows(&self) -> AffectedRows {
+        self.affected_rows
+    }
+}
+
+impl TryFrom<PutResult> for DoPutResponse {
+    type Error = Error;
+
+    fn try_from(value: PutResult) -> Result<Self, Self::Error> {
+        serde_json::from_slice(&value.app_metadata).context(SerdeJsonSnafu)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_serde_do_put_metadata() {
+        let serialized = r#"{"request_id":42}"#;
+        let metadata = serde_json::from_str::<DoPutMetadata>(serialized).unwrap();
+        assert_eq!(metadata.request_id(), 42);
+    }
+
+    #[test]
+    fn test_serde_do_put_response() {
+        let x = DoPutResponse::new(42, 88);
+        let serialized = serde_json::to_string(&x).unwrap();
+        assert_eq!(serialized, r#"{"request_id":42,"affected_rows":88}"#);
+    }
+}
--- a/src/common/macro/src/stack_trace_debug.rs
+++ b/src/common/macro/src/stack_trace_debug.rs
@@ -14,7 +14,7 @@

 //! implement `::common_error::ext::StackError`

-use proc_macro2::{Span, TokenStream as TokenStream2};
+use proc_macro2::{Literal, Span, TokenStream as TokenStream2, TokenTree};
 use quote::{quote, quote_spanned};
 use syn::spanned::Spanned;
 use syn::{parenthesized, Attribute, Ident, ItemEnum, Variant};
@@ -32,6 +32,7 @@ pub fn stack_trace_style_impl(args: TokenStream2, input: TokenStream2) -> TokenS
        variants.push(variant);
    }

+    let transparent_fn = build_transparent_fn(enum_name.clone(), &variants);
    let debug_fmt_fn = build_debug_fmt_impl(enum_name.clone(), variants.clone());
    let next_fn = build_next_impl(enum_name.clone(), variants);
    let debug_impl = build_debug_impl(enum_name.clone());
@@ -43,6 +44,7 @@ pub fn stack_trace_style_impl(args: TokenStream2, input: TokenStream2) -> TokenS
        impl ::common_error::ext::StackError for #enum_name {
            #debug_fmt_fn
            #next_fn
+            #transparent_fn
        }

        #debug_impl
@@ -115,6 +117,7 @@ struct ErrorVariant {
    has_source: bool,
    has_external_cause: bool,
    display: TokenStream2,
+    transparent: bool,
    span: Span,
    cfg_attr: Option<Attribute>,
 }
@@ -140,6 +143,7 @@ impl ErrorVariant {
        }

        let mut display = None;
+        let mut transparent = false;
        let mut cfg_attr = None;
        for attr in variant.attrs {
            if attr.path().is_ident("snafu") {
@@ -150,17 +154,29 @@ impl ErrorVariant {
                        let display_ts: TokenStream2 = content.parse()?;
                        display = Some(display_ts);
                        Ok(())
+                    } else if meta.path.is_ident("transparent") {
+                        display = Some(TokenStream2::from(TokenTree::Literal(Literal::string(
+                            "<transparent>",
+                        ))));
+                        transparent = true;
+                        Ok(())
                    } else {
                        Err(meta.error("unrecognized repr"))
                    }
                })
-                .expect("Each error should contains a display attribute");
+                .unwrap_or_else(|e| panic!("{e}"));
            }

            if attr.path().is_ident("cfg") {
                cfg_attr = Some(attr);
            }
        }
+        let display = display.unwrap_or_else(|| {
+            panic!(
+                r#"Error "{}" must be annotated with attribute "display" or "transparent"."#,
+                variant.ident,
+            )
+        });

        let field_ident = variant
            .fields
@@ -174,7 +190,8 @@ impl ErrorVariant {
            has_location,
            has_source,
            has_external_cause,
-            display: display.unwrap(),
+            display,
+            transparent,
            span,
            cfg_attr,
        }
@@ -275,4 +292,44 @@ impl ErrorVariant {
            }
        }
    }
+
+    fn build_transparent_match_arm(&self) -> TokenStream2 {
+        let cfg = if let Some(cfg) = &self.cfg_attr {
+            quote_spanned!(cfg.span() => #cfg)
+        } else {
+            quote! {}
+        };
+        let name = &self.name;
+        let fields = &self.fields;
+
+        if self.transparent {
+            quote_spanned! {
+                self.span => #cfg #[allow(unused_variables)] #name { #(#fields),* } => {
+                    true
+                },
+            }
+        } else {
+            quote_spanned! {
+                self.span => #cfg #[allow(unused_variables)] #name { #(#fields),* } =>{
+                    false
+                }
+            }
+        }
+    }
+}
+
+fn build_transparent_fn(enum_name: Ident, variants: &[ErrorVariant]) -> TokenStream2 {
+    let match_arms = variants
+        .iter()
+        .map(|v| v.build_transparent_match_arm())
+        .collect::<Vec<_>>();
+
+    quote! {
+        fn transparent(&self) -> bool {
+            use #enum_name::*;
+            match self {
+                #(#match_arms)*
+            }
+        }
+    }
 }
--- a/src/common/meta/src/cache/flow/table_flownode.rs
+++ b/src/common/meta/src/cache/flow/table_flownode.rs
@@ -187,6 +187,7 @@ mod tests {
                    },
                    flownode_ids: BTreeMap::from([(0, 1), (1, 2), (2, 3)]),
                    catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+                    query_context: None,
                    flow_name: "my_flow".to_string(),
                    raw_sql: "sql".to_string(),
                    expire_after: Some(300),
--- a/src/common/meta/src/datanode.rs
+++ b/src/common/meta/src/datanode.rs
@@ -94,6 +94,13 @@ pub struct RegionStat {
    pub index_size: u64,
    /// The manifest infoof the region.
    pub region_manifest: RegionManifestInfo,
+    /// The latest entry id of topic used by data.
+    /// **Only used by remote WAL prune.**
+    pub data_topic_latest_entry_id: u64,
+    /// The latest entry id of topic used by metadata.
+    /// **Only used by remote WAL prune.**
+    /// In mito engine, this is the same as `data_topic_latest_entry_id`.
+    pub metadata_topic_latest_entry_id: u64,
 }

 #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
@@ -142,6 +149,43 @@ impl Stat {
        self.wcus = self.region_stats.iter().map(|s| s.wcus).sum();
        self.region_num = self.region_stats.len() as u64;
    }
+
+    pub fn memory_size(&self) -> usize {
+        // timestamp_millis, rcus, wcus
+        std::mem::size_of::<i64>() * 3 +
+        // id, region_num, node_epoch
+        std::mem::size_of::<u64>() * 3 +
+        // addr
+        std::mem::size_of::<String>() + self.addr.capacity() +
+        // region_stats
+        self.region_stats.iter().map(|s| s.memory_size()).sum::<usize>()
+    }
+}
+
+impl RegionStat {
+    pub fn memory_size(&self) -> usize {
+        // role
+        std::mem::size_of::<RegionRole>() +
+        // id
+        std::mem::size_of::<RegionId>() +
+        // rcus, wcus, approximate_bytes, num_rows
+        std::mem::size_of::<i64>() * 4 +
+        // memtable_size, manifest_size, sst_size, index_size
+        std::mem::size_of::<u64>() * 4 +
+        // engine
+        std::mem::size_of::<String>() + self.engine.capacity() +
+        // region_manifest
+        self.region_manifest.memory_size()
+    }
+}
+
+impl RegionManifestInfo {
+    pub fn memory_size(&self) -> usize {
+        match self {
+            RegionManifestInfo::Mito { .. } => std::mem::size_of::<u64>() * 2,
+            RegionManifestInfo::Metric { .. } => std::mem::size_of::<u64>() * 4,
+        }
+    }
 }

 impl TryFrom<&HeartbeatRequest> for Stat {
@@ -227,6 +271,8 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
            sst_size: region_stat.sst_size,
            index_size: region_stat.index_size,
            region_manifest: region_stat.manifest.into(),
+            data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
+            metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
        }
    }
 }
--- a/src/common/meta/src/ddl/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables.rs
@@ -18,10 +18,12 @@ mod region_request;
 mod table_cache_keys;
 mod update_metadata;

+use api::region::RegionResponse;
 use async_trait::async_trait;
+use common_catalog::format_full_table_name;
 use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
 use common_procedure::{Context, LockKey, Procedure, Status};
-use common_telemetry::{info, warn};
+use common_telemetry::{error, info, warn};
 use futures_util::future;
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, ResultExt};
@@ -30,7 +32,7 @@ use store_api::metric_engine_consts::ALTER_PHYSICAL_EXTENSION_KEY;
 use strum::AsRefStr;
 use table::metadata::TableId;

-use crate::ddl::utils::add_peer_context_if_needed;
+use crate::ddl::utils::{add_peer_context_if_needed, sync_follower_regions};
 use crate::ddl::DdlContext;
 use crate::error::{DecodeJsonSnafu, Error, MetadataCorruptionSnafu, Result};
 use crate::key::table_info::TableInfoValue;
@@ -39,7 +41,7 @@ use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::metrics;
 use crate::rpc::ddl::AlterTableTask;
-use crate::rpc::router::find_leaders;
+use crate::rpc::router::{find_leaders, RegionRoute};

 pub struct AlterLogicalTablesProcedure {
    pub context: DdlContext,
@@ -125,14 +127,20 @@ impl AlterLogicalTablesProcedure {
            });
        }

-        // Collects responses from datanodes.
-        let phy_raw_schemas = future::join_all(alter_region_tasks)
+        let mut results = future::join_all(alter_region_tasks)
            .await
            .into_iter()
-            .map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
            .collect::<Result<Vec<_>>>()?;

+        // Collects responses from datanodes.
+        let phy_raw_schemas = results
+            .iter_mut()
+            .map(|res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY))
+            .collect::<Vec<_>>();
+
        if phy_raw_schemas.is_empty() {
+            self.submit_sync_region_requests(results, &physical_table_route.region_routes)
+                .await;
            self.data.state = AlterTablesState::UpdateMetadata;
            return Ok(Status::executing(true));
        }
@@ -155,10 +163,34 @@ impl AlterLogicalTablesProcedure {
            warn!("altering logical table result doesn't contains extension key `{ALTER_PHYSICAL_EXTENSION_KEY}`,leaving the physical table's schema unchanged");
        }

+        self.submit_sync_region_requests(results, &physical_table_route.region_routes)
+            .await;
        self.data.state = AlterTablesState::UpdateMetadata;
        Ok(Status::executing(true))
    }

+    async fn submit_sync_region_requests(
+        &self,
+        results: Vec<RegionResponse>,
+        region_routes: &[RegionRoute],
+    ) {
+        let table_info = &self.data.physical_table_info.as_ref().unwrap().table_info;
+        if let Err(err) = sync_follower_regions(
+            &self.context,
+            self.data.physical_table_id,
+            results,
+            region_routes,
+            table_info.meta.engine.as_str(),
+        )
+        .await
+        {
+            error!(err; "Failed to sync regions for table {}, table_id: {}",
+                        format_full_table_name(&table_info.catalog_name, &table_info.schema_name, &table_info.name),
+                        self.data.physical_table_id
+            );
+        }
+    }
+
    pub(crate) async fn on_update_metadata(&mut self) -> Result<Status> {
        self.update_physical_table_metadata().await?;
        self.update_logical_tables_metadata().await?;
--- a/src/common/meta/src/ddl/alter_table.rs
+++ b/src/common/meta/src/ddl/alter_table.rs
@@ -19,6 +19,7 @@ mod update_metadata;

 use std::vec;

+use api::region::RegionResponse;
 use api::v1::alter_table_expr::Kind;
 use api::v1::RenameTable;
 use async_trait::async_trait;
@@ -29,7 +30,7 @@ use common_procedure::{
    PoisonKeys, Procedure, ProcedureId, Status, StringKey,
 };
 use common_telemetry::{debug, error, info};
-use futures::future;
+use futures::future::{self};
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, ResultExt};
 use store_api::storage::RegionId;
@@ -38,7 +39,9 @@ use table::metadata::{RawTableInfo, TableId, TableInfo};
 use table::table_reference::TableReference;

 use crate::cache_invalidator::Context;
-use crate::ddl::utils::{add_peer_context_if_needed, handle_multiple_results, MultipleResults};
+use crate::ddl::utils::{
+    add_peer_context_if_needed, handle_multiple_results, sync_follower_regions, MultipleResults,
+};
 use crate::ddl::DdlContext;
 use crate::error::{AbortProcedureSnafu, Error, NoLeaderSnafu, PutPoisonSnafu, Result};
 use crate::instruction::CacheIdent;
@@ -48,7 +51,7 @@ use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
 use crate::metrics;
 use crate::poison_key::table_poison_key;
 use crate::rpc::ddl::AlterTableTask;
-use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};
+use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution, RegionRoute};

 /// The alter table procedure
 pub struct AlterTableProcedure {
@@ -194,7 +197,9 @@ impl AlterTableProcedure {
                // Just returns the error, and wait for the next try.
                Err(error)
            }
-            MultipleResults::Ok => {
+            MultipleResults::Ok(results) => {
+                self.submit_sync_region_requests(results, &physical_table_route.region_routes)
+                    .await;
                self.data.state = AlterTableState::UpdateMetadata;
                Ok(Status::executing_with_clean_poisons(true))
            }
@@ -211,6 +216,26 @@ impl AlterTableProcedure {
        }
    }

+    async fn submit_sync_region_requests(
+        &mut self,
+        results: Vec<RegionResponse>,
+        region_routes: &[RegionRoute],
+    ) {
+        // Safety: filled in `prepare` step.
+        let table_info = self.data.table_info().unwrap();
+        if let Err(err) = sync_follower_regions(
+            &self.context,
+            self.data.table_id(),
+            results,
+            region_routes,
+            table_info.meta.engine.as_str(),
+        )
+        .await
+        {
+            error!(err; "Failed to sync regions for table {}, table_id: {}", self.data.table_ref(), self.data.table_id());
+        }
+    }
+
    /// Update table metadata.
    pub(crate) async fn on_update_metadata(&mut self) -> Result<Status> {
        let table_id = self.data.table_id();
--- a/src/common/meta/src/ddl/create_flow.rs
+++ b/src/common/meta/src/ddl/create_flow.rs
@@ -38,7 +38,7 @@ use table::metadata::TableId;
 use crate::cache_invalidator::Context;
 use crate::ddl::utils::{add_peer_context_if_needed, handle_retry_error};
 use crate::ddl::DdlContext;
-use crate::error::{self, Result};
+use crate::error::{self, Result, UnexpectedSnafu};
 use crate::instruction::{CacheIdent, CreateFlow};
 use crate::key::flow::flow_info::FlowInfoValue;
 use crate::key::flow::flow_route::FlowRouteValue;
@@ -171,7 +171,7 @@ impl CreateFlowProcedure {
        }
        self.data.state = CreateFlowState::CreateFlows;
        // determine flow type
-        self.data.flow_type = Some(determine_flow_type(&self.data.task));
+        self.data.flow_type = Some(get_flow_type_from_options(&self.data.task)?);

        Ok(Status::executing(true))
    }
@@ -196,8 +196,8 @@ impl CreateFlowProcedure {
            });
        }
        info!(
-            "Creating flow({:?}) on flownodes with peers={:?}",
-            self.data.flow_id, self.data.peers
+            "Creating flow({:?}, type={:?}) on flownodes with peers={:?}",
+            self.data.flow_id, self.data.flow_type, self.data.peers
        );
        join_all(create_flow)
            .await
@@ -306,8 +306,20 @@ impl Procedure for CreateFlowProcedure {
    }
 }

-pub fn determine_flow_type(_flow_task: &CreateFlowTask) -> FlowType {
-    FlowType::Batching
+pub fn get_flow_type_from_options(flow_task: &CreateFlowTask) -> Result<FlowType> {
+    let flow_type = flow_task
+        .flow_options
+        .get(FlowType::FLOW_TYPE_KEY)
+        .map(|s| s.as_str());
+    match flow_type {
+        Some(FlowType::BATCHING) => Ok(FlowType::Batching),
+        Some(FlowType::STREAMING) => Ok(FlowType::Streaming),
+        Some(unknown) => UnexpectedSnafu {
+            err_msg: format!("Unknown flow type: {}", unknown),
+        }
+        .fail(),
+        None => Ok(FlowType::Batching),
+    }
 }

 /// The state of [CreateFlowProcedure].
@@ -324,7 +336,7 @@ pub enum CreateFlowState {
 }

 /// The type of flow.
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub enum FlowType {
    /// The flow is a batching task.
    Batching,
@@ -437,6 +449,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            sink_table_name,
            flownode_ids,
            catalog_name,
+            query_context: Some(value.query_context.clone()),
            flow_name,
            raw_sql: sql,
            expire_after,
--- a/src/common/meta/src/ddl/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/create_logical_tables.rs
@@ -17,12 +17,14 @@ mod metadata;
 mod region_request;
 mod update_metadata;

+use api::region::RegionResponse;
 use api::v1::CreateTableExpr;
 use async_trait::async_trait;
+use common_catalog::consts::METRIC_ENGINE;
 use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
 use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
-use common_telemetry::{debug, warn};
-use futures_util::future::join_all;
+use common_telemetry::{debug, error, warn};
+use futures::future;
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, ResultExt};
 use store_api::metadata::ColumnMetadata;
@@ -31,7 +33,7 @@ use store_api::storage::{RegionId, RegionNumber};
 use strum::AsRefStr;
 use table::metadata::{RawTableInfo, TableId};

-use crate::ddl::utils::{add_peer_context_if_needed, handle_retry_error};
+use crate::ddl::utils::{add_peer_context_if_needed, handle_retry_error, sync_follower_regions};
 use crate::ddl::DdlContext;
 use crate::error::{DecodeJsonSnafu, MetadataCorruptionSnafu, Result};
 use crate::key::table_route::TableRouteValue;
@@ -156,14 +158,20 @@ impl CreateLogicalTablesProcedure {
            });
        }

-        // Collects response from datanodes.
-        let phy_raw_schemas = join_all(create_region_tasks)
+        let mut results = future::join_all(create_region_tasks)
            .await
            .into_iter()
-            .map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
            .collect::<Result<Vec<_>>>()?;

+        // Collects response from datanodes.
+        let phy_raw_schemas = results
+            .iter_mut()
+            .map(|res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY))
+            .collect::<Vec<_>>();
+
        if phy_raw_schemas.is_empty() {
+            self.submit_sync_region_requests(results, region_routes)
+                .await;
            self.data.state = CreateTablesState::CreateMetadata;
            return Ok(Status::executing(false));
        }
@@ -186,10 +194,30 @@ impl CreateLogicalTablesProcedure {
            warn!("creating logical table result doesn't contains extension key `{ALTER_PHYSICAL_EXTENSION_KEY}`,leaving the physical table's schema unchanged");
        }

+        self.submit_sync_region_requests(results, region_routes)
+            .await;
        self.data.state = CreateTablesState::CreateMetadata;

        Ok(Status::executing(true))
    }
+
+    async fn submit_sync_region_requests(
+        &self,
+        results: Vec<RegionResponse>,
+        region_routes: &[RegionRoute],
+    ) {
+        if let Err(err) = sync_follower_regions(
+            &self.context,
+            self.data.physical_table_id,
+            results,
+            region_routes,
+            METRIC_ENGINE,
+        )
+        .await
+        {
+            error!(err; "Failed to sync regions for physical table_id: {}",self.data.physical_table_id);
+        }
+    }
 }

 #[async_trait]
--- a/src/common/meta/src/ddl/drop_table/executor.rs
+++ b/src/common/meta/src/ddl/drop_table/executor.rs
@@ -15,12 +15,13 @@
 use std::collections::HashMap;

 use api::v1::region::{
-    region_request, DropRequest as PbDropRegionRequest, RegionRequest, RegionRequestHeader,
+    region_request, CloseRequest as PbCloseRegionRequest, DropRequest as PbDropRegionRequest,
+    RegionRequest, RegionRequestHeader,
 };
 use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
-use common_telemetry::debug;
 use common_telemetry::tracing_context::TracingContext;
+use common_telemetry::{debug, error};
 use common_wal::options::WalOptions;
 use futures::future::join_all;
 use snafu::ensure;
@@ -36,7 +37,8 @@ use crate::instruction::CacheIdent;
 use crate::key::table_name::TableNameKey;
 use crate::key::table_route::TableRouteValue;
 use crate::rpc::router::{
-    find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
+    find_follower_regions, find_followers, find_leader_regions, find_leaders,
+    operating_leader_regions, RegionRoute,
 };

 /// [Control] indicated to the caller whether to go to the next step.
@@ -210,10 +212,10 @@ impl DropTableExecutor {
        region_routes: &[RegionRoute],
        fast_path: bool,
    ) -> Result<()> {
+        // Drops leader regions on datanodes.
        let leaders = find_leaders(region_routes);
        let mut drop_region_tasks = Vec::with_capacity(leaders.len());
        let table_id = self.table_id;
-
        for datanode in leaders {
            let requester = ctx.node_manager.datanode(&datanode).await;
            let regions = find_leader_regions(region_routes, &datanode);
@@ -252,6 +254,53 @@ impl DropTableExecutor {
            .into_iter()
            .collect::<Result<Vec<_>>>()?;

+        // Drops follower regions on datanodes.
+        let followers = find_followers(region_routes);
+        let mut close_region_tasks = Vec::with_capacity(followers.len());
+        for datanode in followers {
+            let requester = ctx.node_manager.datanode(&datanode).await;
+            let regions = find_follower_regions(region_routes, &datanode);
+            let region_ids = regions
+                .iter()
+                .map(|region_number| RegionId::new(table_id, *region_number))
+                .collect::<Vec<_>>();
+
+            for region_id in region_ids {
+                debug!("Closing region {region_id} on Datanode {datanode:?}");
+                let request = RegionRequest {
+                    header: Some(RegionRequestHeader {
+                        tracing_context: TracingContext::from_current_span().to_w3c(),
+                        ..Default::default()
+                    }),
+                    body: Some(region_request::Body::Close(PbCloseRegionRequest {
+                        region_id: region_id.as_u64(),
+                    })),
+                };
+
+                let datanode = datanode.clone();
+                let requester = requester.clone();
+                close_region_tasks.push(async move {
+                    if let Err(err) = requester.handle(request).await {
+                        if err.status_code() != StatusCode::RegionNotFound {
+                            return Err(add_peer_context_if_needed(datanode)(err));
+                        }
+                    }
+                    Ok(())
+                });
+            }
+        }
+
+        // Failure to close follower regions is not critical.
+        // When a leader region is dropped, follower regions will be unable to renew their leases via metasrv.
+        // Eventually, these follower regions will be automatically closed by the region livekeeper.
+        if let Err(err) = join_all(close_region_tasks)
+            .await
+            .into_iter()
+            .collect::<Result<Vec<_>>>()
+        {
+            error!(err; "Failed to close follower regions on datanodes, table_id: {}", table_id);
+        }
+
        // Deletes the leader region from registry.
        let region_ids = operating_leader_regions(region_routes);
        ctx.leader_region_registry
--- a/src/common/meta/src/ddl/test_util/create_table.rs
+++ b/src/common/meta/src/ddl/test_util/create_table.rs
@@ -18,7 +18,9 @@ use api::v1::column_def::try_as_column_schema;
 use api::v1::meta::Partition;
 use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType};
 use chrono::DateTime;
-use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO2_ENGINE};
+use common_catalog::consts::{
+    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO2_ENGINE, MITO_ENGINE,
+};
 use datatypes::schema::RawSchema;
 use derive_builder::Builder;
 use store_api::storage::TableId;
@@ -164,6 +166,7 @@ pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask
        .time_index("ts")
        .primary_keys(["host".into()])
        .table_name(name)
+        .engine(MITO_ENGINE)
        .build()
        .unwrap()
        .into();
--- a/src/common/meta/src/ddl/test_util/datanode_handler.rs
+++ b/src/common/meta/src/ddl/test_util/datanode_handler.rs
@@ -45,14 +45,41 @@ impl MockDatanodeHandler for () {
 }

 #[derive(Clone)]
-pub struct DatanodeWatcher(pub mpsc::Sender<(Peer, RegionRequest)>);
+pub struct DatanodeWatcher {
+    sender: mpsc::Sender<(Peer, RegionRequest)>,
+    handler: Option<fn(Peer, RegionRequest) -> Result<RegionResponse>>,
+}
+
+impl DatanodeWatcher {
+    pub fn new(sender: mpsc::Sender<(Peer, RegionRequest)>) -> Self {
+        Self {
+            sender,
+            handler: None,
+        }
+    }
+
+    pub fn with_handler(
+        mut self,
+        user_handler: fn(Peer, RegionRequest) -> Result<RegionResponse>,
+    ) -> Self {
+        self.handler = Some(user_handler);
+        self
+    }
+}

 #[async_trait::async_trait]
 impl MockDatanodeHandler for DatanodeWatcher {
    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
        debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
-        self.0.send((peer.clone(), request)).await.unwrap();
-        Ok(RegionResponse::new(0))
+        self.sender
+            .send((peer.clone(), request.clone()))
+            .await
+            .unwrap();
+        if let Some(handler) = self.handler {
+            handler(peer.clone(), request)
+        } else {
+            Ok(RegionResponse::new(0))
+        }
    }

    async fn handle_query(
--- a/src/common/meta/src/ddl/tests/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/tests/alter_logical_tables.rs
@@ -15,19 +15,33 @@
 use std::assert_matches::assert_matches;
 use std::sync::Arc;

+use api::region::RegionResponse;
+use api::v1::meta::Peer;
+use api::v1::region::sync_request::ManifestInfo;
+use api::v1::region::{region_request, MetricManifestInfo, RegionRequest, SyncRequest};
 use api::v1::{ColumnDataType, SemanticType};
 use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_procedure::{Procedure, ProcedureId, Status};
 use common_procedure_test::MockContextProvider;
+use store_api::metric_engine_consts::MANIFEST_INFO_EXTENSION_KEY;
+use store_api::region_engine::RegionManifestInfo;
+use store_api::storage::RegionId;
+use tokio::sync::mpsc;

 use crate::ddl::alter_logical_tables::AlterLogicalTablesProcedure;
 use crate::ddl::test_util::alter_table::TestAlterTableExprBuilder;
 use crate::ddl::test_util::columns::TestColumnDefBuilder;
-use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
-use crate::ddl::test_util::{create_logical_table, create_physical_table};
+use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, NaiveDatanodeHandler};
+use crate::ddl::test_util::{
+    create_logical_table, create_physical_table, create_physical_table_metadata,
+    test_create_physical_table_task,
+};
 use crate::error::Error::{AlterLogicalTablesInvalidArguments, TableNotFound};
+use crate::error::Result;
 use crate::key::table_name::TableNameKey;
+use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
 use crate::rpc::ddl::AlterTableTask;
+use crate::rpc::router::{Region, RegionRoute};
 use crate::test_util::{new_ddl_context, MockDatanodeManager};

 fn make_alter_logical_table_add_column_task(
@@ -407,3 +421,78 @@ async fn test_on_part_duplicate_alter_request() {
        ]
    );
 }
+
+fn alters_request_handler(_peer: Peer, request: RegionRequest) -> Result<RegionResponse> {
+    if let region_request::Body::Alters(_) = request.body.unwrap() {
+        let mut response = RegionResponse::new(0);
+        // Default region id for physical table.
+        let region_id = RegionId::new(1000, 1);
+        response.extensions.insert(
+            MANIFEST_INFO_EXTENSION_KEY.to_string(),
+            RegionManifestInfo::encode_list(&[(region_id, RegionManifestInfo::metric(1, 0, 2, 0))])
+                .unwrap(),
+        );
+        return Ok(response);
+    }
+
+    Ok(RegionResponse::new(0))
+}
+
+#[tokio::test]
+async fn test_on_submit_alter_region_request() {
+    common_telemetry::init_default_ut_logging();
+    let (tx, mut rx) = mpsc::channel(8);
+    let handler = DatanodeWatcher::new(tx).with_handler(alters_request_handler);
+    let node_manager = Arc::new(MockDatanodeManager::new(handler));
+    let ddl_context = new_ddl_context(node_manager);
+
+    let mut create_physical_table_task = test_create_physical_table_task("phy");
+    let phy_id = 1000u32;
+    let region_routes = vec![RegionRoute {
+        region: Region::new_test(RegionId::new(phy_id, 1)),
+        leader_peer: Some(Peer::empty(1)),
+        follower_peers: vec![Peer::empty(5)],
+        leader_state: None,
+        leader_down_since: None,
+    }];
+    create_physical_table_task.set_table_id(phy_id);
+    create_physical_table_metadata(
+        &ddl_context,
+        create_physical_table_task.table_info.clone(),
+        TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes)),
+    )
+    .await;
+    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), phy_id, "table2").await;
+
+    let tasks = vec![
+        make_alter_logical_table_add_column_task(None, "table1", vec!["new_col".to_string()]),
+        make_alter_logical_table_add_column_task(None, "table2", vec!["mew_col".to_string()]),
+    ];
+
+    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
+    procedure.on_prepare().await.unwrap();
+    procedure.on_submit_alter_region_requests().await.unwrap();
+    let mut results = Vec::new();
+    for _ in 0..2 {
+        let result = rx.try_recv().unwrap();
+        results.push(result);
+    }
+    rx.try_recv().unwrap_err();
+    let (peer, request) = results.remove(0);
+    assert_eq!(peer.id, 1);
+    assert_matches!(request.body.unwrap(), region_request::Body::Alters(_));
+    let (peer, request) = results.remove(0);
+    assert_eq!(peer.id, 5);
+    assert_matches!(
+        request.body.unwrap(),
+        region_request::Body::Sync(SyncRequest {
+            manifest_info: Some(ManifestInfo::MetricManifestInfo(MetricManifestInfo {
+                data_manifest_version: 1,
+                metadata_manifest_version: 2,
+                ..
+            })),
+            ..
+        })
+    );
+}
--- a/src/common/meta/src/ddl/tests/alter_table.rs
+++ b/src/common/meta/src/ddl/tests/alter_table.rs
@@ -16,7 +16,9 @@ use std::assert_matches::assert_matches;
 use std::collections::HashMap;
 use std::sync::Arc;

+use api::region::RegionResponse;
 use api::v1::alter_table_expr::Kind;
+use api::v1::region::sync_request::ManifestInfo;
 use api::v1::region::{region_request, RegionRequest};
 use api::v1::{
    AddColumn, AddColumns, AlterTableExpr, ColumnDataType, ColumnDef as PbColumnDef, DropColumn,
@@ -28,6 +30,8 @@ use common_error::status_code::StatusCode;
 use common_procedure::store::poison_store::PoisonStore;
 use common_procedure::{ProcedureId, Status};
 use common_procedure_test::MockContextProvider;
+use store_api::metric_engine_consts::MANIFEST_INFO_EXTENSION_KEY;
+use store_api::region_engine::RegionManifestInfo;
 use store_api::storage::RegionId;
 use table::requests::TTL_KEY;
 use tokio::sync::mpsc::{self};
@@ -39,7 +43,7 @@ use crate::ddl::test_util::datanode_handler::{
    AllFailureDatanodeHandler, DatanodeWatcher, PartialSuccessDatanodeHandler,
    RequestOutdatedErrorDatanodeHandler,
 };
-use crate::error::Error;
+use crate::error::{Error, Result};
 use crate::key::datanode_table::DatanodeTableKey;
 use crate::key::table_name::TableNameKey;
 use crate::key::table_route::TableRouteValue;
@@ -120,10 +124,71 @@ async fn test_on_prepare_table_not_exists_err() {
    assert_matches!(err.status_code(), StatusCode::TableNotFound);
 }

+fn test_alter_table_task(table_name: &str) -> AlterTableTask {
+    AlterTableTask {
+        alter_table: AlterTableExpr {
+            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table_name.to_string(),
+            kind: Some(Kind::DropColumns(DropColumns {
+                drop_columns: vec![DropColumn {
+                    name: "cpu".to_string(),
+                }],
+            })),
+        },
+    }
+}
+
+fn assert_alter_request(
+    peer: Peer,
+    request: RegionRequest,
+    expected_peer_id: u64,
+    expected_region_id: RegionId,
+) {
+    assert_eq!(peer.id, expected_peer_id);
+    let Some(region_request::Body::Alter(req)) = request.body else {
+        unreachable!();
+    };
+    assert_eq!(req.region_id, expected_region_id);
+}
+
+fn assert_sync_request(
+    peer: Peer,
+    request: RegionRequest,
+    expected_peer_id: u64,
+    expected_region_id: RegionId,
+    expected_manifest_version: u64,
+) {
+    assert_eq!(peer.id, expected_peer_id);
+    let Some(region_request::Body::Sync(req)) = request.body else {
+        unreachable!();
+    };
+    let Some(ManifestInfo::MitoManifestInfo(info)) = req.manifest_info else {
+        unreachable!();
+    };
+    assert_eq!(info.data_manifest_version, expected_manifest_version);
+    assert_eq!(req.region_id, expected_region_id);
+}
+
+fn alter_request_handler(_peer: Peer, request: RegionRequest) -> Result<RegionResponse> {
+    if let region_request::Body::Alter(req) = request.body.unwrap() {
+        let mut response = RegionResponse::new(0);
+        let region_id = RegionId::from(req.region_id);
+        response.extensions.insert(
+            MANIFEST_INFO_EXTENSION_KEY.to_string(),
+            RegionManifestInfo::encode_list(&[(region_id, RegionManifestInfo::mito(1, 1))])
+                .unwrap(),
+        );
+        return Ok(response);
+    }
+
+    Ok(RegionResponse::new(0))
+}
+
 #[tokio::test]
 async fn test_on_submit_alter_request() {
    let (tx, mut rx) = mpsc::channel(8);
-    let datanode_handler = DatanodeWatcher(tx);
+    let datanode_handler = DatanodeWatcher::new(tx).with_handler(alter_request_handler);
    let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
    let ddl_context = new_ddl_context(node_manager);
    let table_id = 1024;
@@ -140,18 +205,7 @@ async fn test_on_submit_alter_request() {
        .await
        .unwrap();

-    let alter_table_task = AlterTableTask {
-        alter_table: AlterTableExpr {
-            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
-            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
-            table_name: table_name.to_string(),
-            kind: Some(Kind::DropColumns(DropColumns {
-                drop_columns: vec![DropColumn {
-                    name: "cpu".to_string(),
-                }],
-            })),
-        },
-    };
+    let alter_table_task = test_alter_table_task(table_name);
    let procedure_id = ProcedureId::random();
    let provider = Arc::new(MockContextProvider::default());
    let mut procedure =
@@ -162,30 +216,72 @@ async fn test_on_submit_alter_request() {
        .await
        .unwrap();

-    let check = |peer: Peer,
-                 request: RegionRequest,
-                 expected_peer_id: u64,
-                 expected_region_id: RegionId| {
-        assert_eq!(peer.id, expected_peer_id);
-        let Some(region_request::Body::Alter(req)) = request.body else {
-            unreachable!();
-        };
-        assert_eq!(req.region_id, expected_region_id);
-    };
+    let mut results = Vec::new();
+    for _ in 0..5 {
+        let result = rx.try_recv().unwrap();
+        results.push(result);
+    }
+    rx.try_recv().unwrap_err();
+    results.sort_unstable_by(|(a, _), (b, _)| a.id.cmp(&b.id));
+
+    let (peer, request) = results.remove(0);
+    assert_alter_request(peer, request, 1, RegionId::new(table_id, 1));
+    let (peer, request) = results.remove(0);
+    assert_alter_request(peer, request, 2, RegionId::new(table_id, 2));
+    let (peer, request) = results.remove(0);
+    assert_alter_request(peer, request, 3, RegionId::new(table_id, 3));
+    let (peer, request) = results.remove(0);
+    assert_sync_request(peer, request, 4, RegionId::new(table_id, 2), 1);
+    let (peer, request) = results.remove(0);
+    assert_sync_request(peer, request, 5, RegionId::new(table_id, 1), 1);
+}
+
+#[tokio::test]
+async fn test_on_submit_alter_request_without_sync_request() {
+    let (tx, mut rx) = mpsc::channel(8);
+    // without use `alter_request_handler`, so no sync request will be sent.
+    let datanode_handler = DatanodeWatcher::new(tx);
+    let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
+    let ddl_context = new_ddl_context(node_manager);
+    let table_id = 1024;
+    let table_name = "foo";
+    let task = test_create_table_task(table_name, table_id);
+    // Puts a value to table name key.
+    ddl_context
+        .table_metadata_manager
+        .create_table_metadata(
+            task.table_info.clone(),
+            prepare_table_route(table_id),
+            HashMap::new(),
+        )
+        .await
+        .unwrap();
+
+    let alter_table_task = test_alter_table_task(table_name);
+    let procedure_id = ProcedureId::random();
+    let provider = Arc::new(MockContextProvider::default());
+    let mut procedure =
+        AlterTableProcedure::new(table_id, alter_table_task, ddl_context.clone()).unwrap();
+    procedure.on_prepare().await.unwrap();
+    procedure
+        .submit_alter_region_requests(procedure_id, provider.as_ref())
+        .await
+        .unwrap();

    let mut results = Vec::new();
    for _ in 0..3 {
        let result = rx.try_recv().unwrap();
        results.push(result);
    }
+    rx.try_recv().unwrap_err();
    results.sort_unstable_by(|(a, _), (b, _)| a.id.cmp(&b.id));

    let (peer, request) = results.remove(0);
-    check(peer, request, 1, RegionId::new(table_id, 1));
+    assert_alter_request(peer, request, 1, RegionId::new(table_id, 1));
    let (peer, request) = results.remove(0);
-    check(peer, request, 2, RegionId::new(table_id, 2));
+    assert_alter_request(peer, request, 2, RegionId::new(table_id, 2));
    let (peer, request) = results.remove(0);
-    check(peer, request, 3, RegionId::new(table_id, 3));
+    assert_alter_request(peer, request, 3, RegionId::new(table_id, 3));
 }

 #[tokio::test]
--- a/src/common/meta/src/ddl/tests/create_flow.rs
+++ b/src/common/meta/src/ddl/tests/create_flow.rs
@@ -46,7 +46,7 @@ pub(crate) fn test_create_flow_task(
        create_if_not_exists,
        expire_after: Some(300),
        comment: "".to_string(),
-        sql: "raw_sql".to_string(),
+        sql: "select 1".to_string(),
        flow_options: Default::default(),
    }
 }
--- a/src/common/meta/src/ddl/tests/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/tests/create_logical_tables.rs
@@ -15,20 +15,28 @@
 use std::assert_matches::assert_matches;
 use std::sync::Arc;

+use api::region::RegionResponse;
+use api::v1::meta::Peer;
+use api::v1::region::sync_request::ManifestInfo;
+use api::v1::region::{region_request, MetricManifestInfo, RegionRequest, SyncRequest};
 use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
 use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status};
 use common_procedure_test::MockContextProvider;
+use store_api::metric_engine_consts::MANIFEST_INFO_EXTENSION_KEY;
+use store_api::region_engine::RegionManifestInfo;
 use store_api::storage::RegionId;
+use tokio::sync::mpsc;

 use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
-use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
+use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, NaiveDatanodeHandler};
 use crate::ddl::test_util::{
    create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
 };
 use crate::ddl::TableMetadata;
-use crate::error::Error;
-use crate::key::table_route::TableRouteValue;
+use crate::error::{Error, Result};
+use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
+use crate::rpc::router::{Region, RegionRoute};
 use crate::test_util::{new_ddl_context, MockDatanodeManager};

 #[tokio::test]
@@ -390,3 +398,76 @@ async fn test_on_create_metadata_err() {
    let error = procedure.execute(&ctx).await.unwrap_err();
    assert!(!error.is_retry_later());
 }
+
+fn creates_request_handler(_peer: Peer, request: RegionRequest) -> Result<RegionResponse> {
+    if let region_request::Body::Creates(_) = request.body.unwrap() {
+        let mut response = RegionResponse::new(0);
+        // Default region id for physical table.
+        let region_id = RegionId::new(1024, 1);
+        response.extensions.insert(
+            MANIFEST_INFO_EXTENSION_KEY.to_string(),
+            RegionManifestInfo::encode_list(&[(region_id, RegionManifestInfo::metric(1, 0, 2, 0))])
+                .unwrap(),
+        );
+        return Ok(response);
+    }
+
+    Ok(RegionResponse::new(0))
+}
+
+#[tokio::test]
+async fn test_on_submit_create_request() {
+    common_telemetry::init_default_ut_logging();
+    let (tx, mut rx) = mpsc::channel(8);
+    let handler = DatanodeWatcher::new(tx).with_handler(creates_request_handler);
+    let node_manager = Arc::new(MockDatanodeManager::new(handler));
+    let ddl_context = new_ddl_context(node_manager);
+    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
+    let table_id = 1024u32;
+    let region_routes = vec![RegionRoute {
+        region: Region::new_test(RegionId::new(table_id, 1)),
+        leader_peer: Some(Peer::empty(1)),
+        follower_peers: vec![Peer::empty(5)],
+        leader_state: None,
+        leader_down_since: None,
+    }];
+    create_physical_table_task.set_table_id(table_id);
+    create_physical_table_metadata(
+        &ddl_context,
+        create_physical_table_task.table_info.clone(),
+        TableRouteValue::Physical(PhysicalTableRouteValue::new(region_routes)),
+    )
+    .await;
+    let physical_table_id = table_id;
+    let task = test_create_logical_table_task("foo");
+    let yet_another_task = test_create_logical_table_task("bar");
+    let mut procedure = CreateLogicalTablesProcedure::new(
+        vec![task, yet_another_task],
+        physical_table_id,
+        ddl_context,
+    );
+    procedure.on_prepare().await.unwrap();
+    procedure.on_datanode_create_regions().await.unwrap();
+    let mut results = Vec::new();
+    for _ in 0..2 {
+        let result = rx.try_recv().unwrap();
+        results.push(result);
+    }
+    rx.try_recv().unwrap_err();
+    let (peer, request) = results.remove(0);
+    assert_eq!(peer.id, 1);
+    assert_matches!(request.body.unwrap(), region_request::Body::Creates(_));
+    let (peer, request) = results.remove(0);
+    assert_eq!(peer.id, 5);
+    assert_matches!(
+        request.body.unwrap(),
+        region_request::Body::Sync(SyncRequest {
+            manifest_info: Some(ManifestInfo::MetricManifestInfo(MetricManifestInfo {
+                data_manifest_version: 1,
+                metadata_manifest_version: 2,
+                ..
+            })),
+            ..
+        })
+    );
+}
--- a/src/common/meta/src/ddl/tests/drop_table.rs
+++ b/src/common/meta/src/ddl/tests/drop_table.rs
@@ -100,7 +100,7 @@ async fn test_on_prepare_table() {
 #[tokio::test]
 async fn test_on_datanode_drop_regions() {
    let (tx, mut rx) = mpsc::channel(8);
-    let datanode_handler = DatanodeWatcher(tx);
+    let datanode_handler = DatanodeWatcher::new(tx);
    let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
    let ddl_context = new_ddl_context(node_manager);
    let table_id = 1024;
@@ -148,27 +148,39 @@ async fn test_on_datanode_drop_regions() {
    let check = |peer: Peer,
                 request: RegionRequest,
                 expected_peer_id: u64,
-                 expected_region_id: RegionId| {
+                 expected_region_id: RegionId,
+                 follower: bool| {
        assert_eq!(peer.id, expected_peer_id);
-        let Some(region_request::Body::Drop(req)) = request.body else {
-            unreachable!();
+        if follower {
+            let Some(region_request::Body::Close(req)) = request.body else {
+                unreachable!();
+            };
+            assert_eq!(req.region_id, expected_region_id);
+        } else {
+            let Some(region_request::Body::Drop(req)) = request.body else {
+                unreachable!();
+            };
+            assert_eq!(req.region_id, expected_region_id);
        };
-        assert_eq!(req.region_id, expected_region_id);
    };

    let mut results = Vec::new();
-    for _ in 0..3 {
+    for _ in 0..5 {
        let result = rx.try_recv().unwrap();
        results.push(result);
    }
    results.sort_unstable_by(|(a, _), (b, _)| a.id.cmp(&b.id));

    let (peer, request) = results.remove(0);
-    check(peer, request, 1, RegionId::new(table_id, 1));
+    check(peer, request, 1, RegionId::new(table_id, 1), false);
    let (peer, request) = results.remove(0);
-    check(peer, request, 2, RegionId::new(table_id, 2));
+    check(peer, request, 2, RegionId::new(table_id, 2), false);
    let (peer, request) = results.remove(0);
-    check(peer, request, 3, RegionId::new(table_id, 3));
+    check(peer, request, 3, RegionId::new(table_id, 3), false);
+    let (peer, request) = results.remove(0);
+    check(peer, request, 4, RegionId::new(table_id, 2), true);
+    let (peer, request) = results.remove(0);
+    check(peer, request, 5, RegionId::new(table_id, 1), true);
 }

 #[tokio::test]
--- a/src/common/meta/src/ddl/utils.rs
+++ b/src/common/meta/src/ddl/utils.rs
@@ -15,27 +15,37 @@
 use std::collections::HashMap;
 use std::fmt::Debug;

-use common_catalog::consts::METRIC_ENGINE;
+use api::region::RegionResponse;
+use api::v1::region::sync_request::ManifestInfo;
+use api::v1::region::{
+    region_request, MetricManifestInfo, MitoManifestInfo, RegionRequest, RegionRequestHeader,
+    SyncRequest,
+};
+use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
 use common_error::ext::BoxedError;
 use common_procedure::error::Error as ProcedureError;
-use common_telemetry::{error, warn};
+use common_telemetry::tracing_context::TracingContext;
+use common_telemetry::{error, info, warn};
 use common_wal::options::WalOptions;
+use futures::future::join_all;
 use snafu::{ensure, OptionExt, ResultExt};
-use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
-use store_api::storage::RegionNumber;
+use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, MANIFEST_INFO_EXTENSION_KEY};
+use store_api::region_engine::RegionManifestInfo;
+use store_api::storage::{RegionId, RegionNumber};
 use table::metadata::TableId;
 use table::table_reference::TableReference;

-use crate::ddl::DetectingRegion;
+use crate::ddl::{DdlContext, DetectingRegion};
 use crate::error::{
-    Error, OperateDatanodeSnafu, ParseWalOptionsSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu,
+    self, Error, OperateDatanodeSnafu, ParseWalOptionsSnafu, Result, TableNotFoundSnafu,
+    UnsupportedSnafu,
 };
 use crate::key::datanode_table::DatanodeTableValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::TableMetadataManagerRef;
 use crate::peer::Peer;
 use crate::rpc::ddl::CreateTableTask;
-use crate::rpc::router::RegionRoute;
+use crate::rpc::router::{find_follower_regions, find_followers, RegionRoute};

 /// Adds [Peer] context if the error is unretryable.
 pub fn add_peer_context_if_needed(datanode: Peer) -> impl FnOnce(Error) -> Error {
@@ -192,8 +202,8 @@ pub fn extract_region_wal_options(
 /// - PartialNonRetryable: if any operation is non retryable, the result is non retryable.
 /// - AllRetryable: all operations are retryable.
 /// - AllNonRetryable: all operations are not retryable.
-pub enum MultipleResults {
-    Ok,
+pub enum MultipleResults<T> {
+    Ok(Vec<T>),
    PartialRetryable(Error),
    PartialNonRetryable(Error),
    AllRetryable(Error),
@@ -205,9 +215,9 @@ pub enum MultipleResults {
 /// For partial success, we need to check if the errors are retryable.
 /// If all the errors are retryable, we return a retryable error.
 /// Otherwise, we return the first error.
-pub fn handle_multiple_results<T: Debug>(results: Vec<Result<T>>) -> MultipleResults {
+pub fn handle_multiple_results<T: Debug>(results: Vec<Result<T>>) -> MultipleResults<T> {
    if results.is_empty() {
-        return MultipleResults::Ok;
+        return MultipleResults::Ok(Vec::new());
    }
    let num_results = results.len();
    let mut retryable_results = Vec::new();
@@ -216,7 +226,7 @@ pub fn handle_multiple_results<T: Debug>(results: Vec<Result<T>>) -> MultipleRes

    for result in results {
        match result {
-            Ok(_) => ok_results.push(result),
+            Ok(value) => ok_results.push(value),
            Err(err) => {
                if err.is_retry_later() {
                    retryable_results.push(err);
@@ -243,7 +253,7 @@ pub fn handle_multiple_results<T: Debug>(results: Vec<Result<T>>) -> MultipleRes
        }
        return MultipleResults::AllNonRetryable(non_retryable_results.into_iter().next().unwrap());
    } else if ok_results.len() == num_results {
-        return MultipleResults::Ok;
+        return MultipleResults::Ok(ok_results);
    } else if !retryable_results.is_empty()
        && !ok_results.is_empty()
        && non_retryable_results.is_empty()
@@ -264,6 +274,125 @@ pub fn handle_multiple_results<T: Debug>(results: Vec<Result<T>>) -> MultipleRes
    MultipleResults::PartialNonRetryable(non_retryable_results.into_iter().next().unwrap())
 }

+/// Parses manifest infos from extensions.
+pub fn parse_manifest_infos_from_extensions(
+    extensions: &HashMap<String, Vec<u8>>,
+) -> Result<Vec<(RegionId, RegionManifestInfo)>> {
+    let data_manifest_version =
+        extensions
+            .get(MANIFEST_INFO_EXTENSION_KEY)
+            .context(error::UnexpectedSnafu {
+                err_msg: "manifest info extension not found",
+            })?;
+    let data_manifest_version =
+        RegionManifestInfo::decode_list(data_manifest_version).context(error::SerdeJsonSnafu {})?;
+    Ok(data_manifest_version)
+}
+
+/// Sync follower regions on datanodes.
+pub async fn sync_follower_regions(
+    context: &DdlContext,
+    table_id: TableId,
+    results: Vec<RegionResponse>,
+    region_routes: &[RegionRoute],
+    engine: &str,
+) -> Result<()> {
+    if engine != MITO_ENGINE && engine != METRIC_ENGINE {
+        info!(
+            "Skip submitting sync region requests for table_id: {}, engine: {}",
+            table_id, engine
+        );
+        return Ok(());
+    }
+
+    let results = results
+        .into_iter()
+        .map(|response| parse_manifest_infos_from_extensions(&response.extensions))
+        .collect::<Result<Vec<_>>>()?
+        .into_iter()
+        .flatten()
+        .collect::<HashMap<_, _>>();
+
+    let is_mito_engine = engine == MITO_ENGINE;
+
+    let followers = find_followers(region_routes);
+    if followers.is_empty() {
+        return Ok(());
+    }
+    let mut sync_region_tasks = Vec::with_capacity(followers.len());
+    for datanode in followers {
+        let requester = context.node_manager.datanode(&datanode).await;
+        let regions = find_follower_regions(region_routes, &datanode);
+        for region in regions {
+            let region_id = RegionId::new(table_id, region);
+            let manifest_info = if is_mito_engine {
+                let region_manifest_info =
+                    results.get(&region_id).context(error::UnexpectedSnafu {
+                        err_msg: format!("No manifest info found for region {}", region_id),
+                    })?;
+                ensure!(
+                    region_manifest_info.is_mito(),
+                    error::UnexpectedSnafu {
+                        err_msg: format!("Region {} is not a mito region", region_id)
+                    }
+                );
+                ManifestInfo::MitoManifestInfo(MitoManifestInfo {
+                    data_manifest_version: region_manifest_info.data_manifest_version(),
+                })
+            } else {
+                let region_manifest_info =
+                    results.get(&region_id).context(error::UnexpectedSnafu {
+                        err_msg: format!("No manifest info found for region {}", region_id),
+                    })?;
+                ensure!(
+                    region_manifest_info.is_metric(),
+                    error::UnexpectedSnafu {
+                        err_msg: format!("Region {} is not a metric region", region_id)
+                    }
+                );
+                ManifestInfo::MetricManifestInfo(MetricManifestInfo {
+                    data_manifest_version: region_manifest_info.data_manifest_version(),
+                    metadata_manifest_version: region_manifest_info
+                        .metadata_manifest_version()
+                        .unwrap_or_default(),
+                })
+            };
+            let request = RegionRequest {
+                header: Some(RegionRequestHeader {
+                    tracing_context: TracingContext::from_current_span().to_w3c(),
+                    ..Default::default()
+                }),
+                body: Some(region_request::Body::Sync(SyncRequest {
+                    region_id: region_id.as_u64(),
+                    manifest_info: Some(manifest_info),
+                })),
+            };
+
+            let datanode = datanode.clone();
+            let requester = requester.clone();
+            sync_region_tasks.push(async move {
+                requester
+                    .handle(request)
+                    .await
+                    .map_err(add_peer_context_if_needed(datanode))
+            });
+        }
+    }
+
+    // Failure to sync region is not critical.
+    // We try our best to sync the regions.
+    if let Err(err) = join_all(sync_region_tasks)
+        .await
+        .into_iter()
+        .collect::<Result<Vec<_>>>()
+    {
+        error!(err; "Failed to sync follower regions on datanodes, table_id: {}", table_id);
+    }
+    info!("Sync follower regions on datanodes, table_id: {}", table_id);
+
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -401,6 +401,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Invalid flow request body: {:?}", body))]
+    InvalidFlowRequestBody {
+        body: Box<Option<api::v1::flow::flow_request::Body>>,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Failed to get kv cache, err: {}", err_msg))]
    GetKvCache { err_msg: String },

@@ -783,6 +790,14 @@ pub enum Error {
        #[snafu(source)]
        source: common_procedure::error::Error,
    },
+
+    #[snafu(display("Failed to parse timezone"))]
+    InvalidTimeZone {
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        error: common_time::error::Error,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -853,7 +868,9 @@ impl ErrorExt for Error {
            | TlsConfig { .. }
            | InvalidSetDatabaseOption { .. }
            | InvalidUnsetDatabaseOption { .. }
-            | InvalidTopicNamePrefix { .. } => StatusCode::InvalidArguments,
+            | InvalidTopicNamePrefix { .. }
+            | InvalidTimeZone { .. } => StatusCode::InvalidArguments,
+            InvalidFlowRequestBody { .. } => StatusCode::InvalidArguments,

            FlowNotFound { .. } => StatusCode::FlowNotFound,
            FlowRouteNotFound { .. } => StatusCode::Unexpected,
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -57,6 +57,8 @@ impl Display for RegionIdent {
 pub struct DowngradeRegionReply {
    /// Returns the `last_entry_id` if available.
    pub last_entry_id: Option<u64>,
+    /// Returns the `metadata_last_entry_id` if available (Only available for metric engine).
+    pub metadata_last_entry_id: Option<u64>,
    /// Indicates whether the region exists.
    pub exists: bool,
    /// Return error if any during the operation.
@@ -136,16 +138,14 @@ pub struct DowngradeRegion {
    /// `None` stands for don't flush before downgrading the region.
    #[serde(default)]
    pub flush_timeout: Option<Duration>,
-    /// Rejects all write requests after flushing.
-    pub reject_write: bool,
 }

 impl Display for DowngradeRegion {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
-            "DowngradeRegion(region_id={}, flush_timeout={:?}, rejct_write={})",
-            self.region_id, self.flush_timeout, self.reject_write
+            "DowngradeRegion(region_id={}, flush_timeout={:?})",
+            self.region_id, self.flush_timeout,
        )
    }
 }
@@ -157,6 +157,8 @@ pub struct UpgradeRegion {
    pub region_id: RegionId,
    /// The `last_entry_id` of old leader region.
    pub last_entry_id: Option<u64>,
+    /// The `last_entry_id` of old leader metadata region (Only used for metric engine).
+    pub metadata_last_entry_id: Option<u64>,
    /// The timeout of waiting for a wal replay.
    ///
    /// `None` stands for no wait,
--- a/src/common/meta/src/key/flow.rs
+++ b/src/common/meta/src/key/flow.rs
@@ -452,6 +452,7 @@ mod tests {
        };
        FlowInfoValue {
            catalog_name: catalog_name.to_string(),
+            query_context: None,
            flow_name: flow_name.to_string(),
            source_table_ids,
            sink_table_name,
@@ -625,6 +626,7 @@ mod tests {
        };
        let flow_value = FlowInfoValue {
            catalog_name: "greptime".to_string(),
+            query_context: None,
            flow_name: "flow".to_string(),
            source_table_ids: vec![1024, 1025, 1026],
            sink_table_name: another_sink_table_name,
@@ -864,6 +866,7 @@ mod tests {
        };
        let flow_value = FlowInfoValue {
            catalog_name: "greptime".to_string(),
+            query_context: None,
            flow_name: "flow".to_string(),
            source_table_ids: vec![1024, 1025, 1026],
            sink_table_name: another_sink_table_name,
--- a/src/common/meta/src/key/flow/flow_info.rs
+++ b/src/common/meta/src/key/flow/flow_info.rs
@@ -121,6 +121,13 @@ pub struct FlowInfoValue {
    pub(crate) flownode_ids: BTreeMap<FlowPartitionId, FlownodeId>,
    /// The catalog name.
    pub(crate) catalog_name: String,
+    /// The query context used when create flow.
+    /// Although flow doesn't belong to any schema, this query_context is needed to remember
+    /// the query context when `create_flow` is executed
+    /// for recovering flow using the same sql&query_context after db restart.
+    /// if none, should use default query context
+    #[serde(default)]
+    pub(crate) query_context: Option<crate::rpc::ddl::QueryContext>,
    /// The flow name.
    pub(crate) flow_name: String,
    /// The raw sql.
@@ -155,6 +162,10 @@ impl FlowInfoValue {
        &self.catalog_name
    }

+    pub fn query_context(&self) -> &Option<crate::rpc::ddl::QueryContext> {
+        &self.query_context
+    }
+
    pub fn flow_name(&self) -> &String {
        &self.flow_name
    }
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -15,8 +15,6 @@
 #![feature(assert_matches)]
 #![feature(btree_extract_if)]
 #![feature(let_chains)]
-#![feature(extract_if)]
-#![feature(hash_extract_if)]

 pub mod cache;
 pub mod cache_invalidator;
--- a/src/common/meta/src/region_registry.rs
+++ b/src/common/meta/src/region_registry.rs
@@ -19,7 +19,7 @@ use std::sync::{Arc, RwLock};
 use common_telemetry::warn;
 use store_api::storage::RegionId;

-use crate::datanode::RegionManifestInfo;
+use crate::datanode::{RegionManifestInfo, RegionStat};

 /// Represents information about a leader region in the cluster.
 /// Contains the datanode id where the leader is located,
@@ -35,25 +35,22 @@ pub enum LeaderRegionManifestInfo {
    Mito {
        manifest_version: u64,
        flushed_entry_id: u64,
+        topic_latest_entry_id: u64,
    },
    Metric {
        data_manifest_version: u64,
        data_flushed_entry_id: u64,
+        data_topic_latest_entry_id: u64,
        metadata_manifest_version: u64,
        metadata_flushed_entry_id: u64,
+        metadata_topic_latest_entry_id: u64,
    },
 }

-impl From<RegionManifestInfo> for LeaderRegionManifestInfo {
-    fn from(value: RegionManifestInfo) -> Self {
-        match value {
-            RegionManifestInfo::Mito {
-                manifest_version,
-                flushed_entry_id,
-            } => LeaderRegionManifestInfo::Mito {
-                manifest_version,
-                flushed_entry_id,
-            },
+impl LeaderRegionManifestInfo {
+    /// Generate a [LeaderRegionManifestInfo] from [RegionStat].
+    pub fn from_region_stat(region_stat: &RegionStat) -> LeaderRegionManifestInfo {
+        match region_stat.region_manifest {
            RegionManifestInfo::Metric {
                data_manifest_version,
                data_flushed_entry_id,
@@ -62,14 +59,22 @@ impl From<RegionManifestInfo> for LeaderRegionManifestInfo {
            } => LeaderRegionManifestInfo::Metric {
                data_manifest_version,
                data_flushed_entry_id,
+                data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
                metadata_manifest_version,
                metadata_flushed_entry_id,
+                metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
+            },
+            RegionManifestInfo::Mito {
+                manifest_version,
+                flushed_entry_id,
+            } => LeaderRegionManifestInfo::Mito {
+                manifest_version,
+                flushed_entry_id,
+                topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
            },
        }
    }
-}

-impl LeaderRegionManifestInfo {
    /// Returns the manifest version of the leader region.
    pub fn manifest_version(&self) -> u64 {
        match self {
@@ -96,18 +101,35 @@ impl LeaderRegionManifestInfo {
        }
    }

-    /// Returns the minimum flushed entry id of the leader region.
-    /// It is used to determine the minimum flushed entry id that can be pruned in remote wal.
-    pub fn min_flushed_entry_id(&self) -> u64 {
+    /// Returns prunable entry id of the leader region.
+    /// It is used to determine the entry id that can be pruned in remote wal.
+    ///
+    /// For a mito region, the prunable entry id should max(flushed_entry_id, latest_entry_id_since_flush).
+    ///
+    /// For a metric region, the prunable entry id should min(
+    ///     max(data_flushed_entry_id, data_latest_entry_id_since_flush),
+    ///     max(metadata_flushed_entry_id, metadata_latest_entry_id_since_flush)
+    /// ).
+    pub fn prunable_entry_id(&self) -> u64 {
        match self {
            LeaderRegionManifestInfo::Mito {
-                flushed_entry_id, ..
-            } => *flushed_entry_id,
+                flushed_entry_id,
+                topic_latest_entry_id,
+                ..
+            } => (*flushed_entry_id).max(*topic_latest_entry_id),
            LeaderRegionManifestInfo::Metric {
                data_flushed_entry_id,
+                data_topic_latest_entry_id,
                metadata_flushed_entry_id,
+                metadata_topic_latest_entry_id,
                ..
-            } => (*data_flushed_entry_id).min(*metadata_flushed_entry_id),
+            } => {
+                let data_prunable_entry_id =
+                    (*data_flushed_entry_id).max(*data_topic_latest_entry_id);
+                let metadata_prunable_entry_id =
+                    (*metadata_flushed_entry_id).max(*metadata_topic_latest_entry_id);
+                data_prunable_entry_id.min(metadata_prunable_entry_id)
+            }
        }
    }
 }
--- a/src/common/meta/src/rpc/ddl.rs
+++ b/src/common/meta/src/rpc/ddl.rs
@@ -35,17 +35,20 @@ use api::v1::{
 };
 use base64::engine::general_purpose;
 use base64::Engine as _;
-use common_time::DatabaseTimeToLive;
+use common_time::{DatabaseTimeToLive, Timezone};
 use prost::Message;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DefaultOnNull};
-use session::context::QueryContextRef;
+use session::context::{QueryContextBuilder, QueryContextRef};
 use snafu::{OptionExt, ResultExt};
 use table::metadata::{RawTableInfo, TableId};
 use table::table_name::TableName;
 use table::table_reference::TableReference;

-use crate::error::{self, InvalidSetDatabaseOptionSnafu, InvalidUnsetDatabaseOptionSnafu, Result};
+use crate::error::{
+    self, InvalidSetDatabaseOptionSnafu, InvalidTimeZoneSnafu, InvalidUnsetDatabaseOptionSnafu,
+    Result,
+};
 use crate::key::FlowId;

 /// DDL tasks
@@ -1202,7 +1205,7 @@ impl From<DropFlowTask> for PbDropFlowTask {
    }
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct QueryContext {
    current_catalog: String,
    current_schema: String,
@@ -1223,6 +1226,19 @@ impl From<QueryContextRef> for QueryContext {
    }
 }

+impl TryFrom<QueryContext> for session::context::QueryContext {
+    type Error = error::Error;
+    fn try_from(value: QueryContext) -> std::result::Result<Self, Self::Error> {
+        Ok(QueryContextBuilder::default()
+            .current_catalog(value.current_catalog)
+            .current_schema(value.current_schema)
+            .timezone(Timezone::from_tz_string(&value.timezone).context(InvalidTimeZoneSnafu)?)
+            .extensions(value.extensions)
+            .channel((value.channel as u32).into())
+            .build())
+    }
+}
+
 impl From<QueryContext> for PbQueryContext {
    fn from(
        QueryContext {
--- a/src/common/meta/src/rpc/router.rs
+++ b/src/common/meta/src/rpc/router.rs
@@ -32,6 +32,10 @@ use crate::key::RegionDistribution;
 use crate::peer::Peer;
 use crate::DatanodeId;

+/// Returns the distribution of regions to datanodes.
+///
+/// The distribution is a map of datanode id to a list of region ids.
+/// The list of region ids is sorted in ascending order.
 pub fn region_distribution(region_routes: &[RegionRoute]) -> RegionDistribution {
    let mut regions_id_map = RegionDistribution::new();
    for route in region_routes.iter() {
@@ -39,6 +43,10 @@ pub fn region_distribution(region_routes: &[RegionRoute]) -> RegionDistribution
            let region_id = route.region.id.region_number();
            regions_id_map.entry(peer.id).or_default().push(region_id);
        }
+        for peer in route.follower_peers.iter() {
+            let region_id = route.region.id.region_number();
+            regions_id_map.entry(peer.id).or_default().push(region_id);
+        }
    }
    for (_, regions) in regions_id_map.iter_mut() {
        // id asc
@@ -54,6 +62,7 @@ pub struct TableRoute {
    region_leaders: HashMap<RegionNumber, Option<Peer>>,
 }

+/// Returns the leader peers of the table.
 pub fn find_leaders(region_routes: &[RegionRoute]) -> HashSet<Peer> {
    region_routes
        .iter()
@@ -62,6 +71,15 @@ pub fn find_leaders(region_routes: &[RegionRoute]) -> HashSet<Peer> {
        .collect()
 }

+/// Returns the followers of the table.
+pub fn find_followers(region_routes: &[RegionRoute]) -> HashSet<Peer> {
+    region_routes
+        .iter()
+        .flat_map(|x| &x.follower_peers)
+        .cloned()
+        .collect()
+}
+
 /// Returns the operating leader regions with corresponding [DatanodeId].
 pub fn operating_leader_regions(region_routes: &[RegionRoute]) -> Vec<(RegionId, DatanodeId)> {
    region_routes
@@ -100,6 +118,7 @@ pub fn find_region_leader(
        .cloned()
 }

+/// Returns the region numbers of the leader regions on the target datanode.
 pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Vec<RegionNumber> {
    region_routes
        .iter()
@@ -114,6 +133,19 @@ pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Ve
        .collect()
 }

+/// Returns the region numbers of the follower regions on the target datanode.
+pub fn find_follower_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Vec<RegionNumber> {
+    region_routes
+        .iter()
+        .filter_map(|x| {
+            if x.follower_peers.contains(datanode) {
+                return Some(x.region.id.region_number());
+            }
+            None
+        })
+        .collect()
+}
+
 impl TableRoute {
    pub fn new(table: Table, region_routes: Vec<RegionRoute>) -> Self {
        let region_leaders = region_routes
@@ -144,15 +176,12 @@ impl TableRoute {
                })?
                .into();

-            let leader_peer = peers
-                .get(region_route.leader_peer_index as usize)
-                .cloned()
-                .map(Into::into);
+            let leader_peer = peers.get(region_route.leader_peer_index as usize).cloned();

            let follower_peers = region_route
                .follower_peer_indexes
                .into_iter()
-                .filter_map(|x| peers.get(x as usize).cloned().map(Into::into))
+                .filter_map(|x| peers.get(x as usize).cloned())
                .collect::<Vec<_>>();

            region_routes.push(RegionRoute {
@@ -550,4 +579,40 @@ mod tests {

        assert_eq!(got, p);
    }
+
+    #[test]
+    fn test_region_distribution() {
+        let region_routes = vec![
+            RegionRoute {
+                region: Region {
+                    id: RegionId::new(1, 1),
+                    name: "r1".to_string(),
+                    partition: None,
+                    attrs: BTreeMap::new(),
+                },
+                leader_peer: Some(Peer::new(1, "a1")),
+                follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
+                leader_state: None,
+                leader_down_since: None,
+            },
+            RegionRoute {
+                region: Region {
+                    id: RegionId::new(1, 2),
+                    name: "r2".to_string(),
+                    partition: None,
+                    attrs: BTreeMap::new(),
+                },
+                leader_peer: Some(Peer::new(2, "a2")),
+                follower_peers: vec![Peer::new(1, "a1"), Peer::new(3, "a3")],
+                leader_state: None,
+                leader_down_since: None,
+            },
+        ];
+
+        let distribution = region_distribution(&region_routes);
+        assert_eq!(distribution.len(), 3);
+        assert_eq!(distribution[&1], vec![1, 2]);
+        assert_eq!(distribution[&2], vec![1, 2]);
+        assert_eq!(distribution[&3], vec![1, 2]);
+    }
 }
--- a/src/common/meta/src/wal_options_allocator.rs
+++ b/src/common/meta/src/wal_options_allocator.rs
@@ -30,7 +30,9 @@ use crate::error::{EncodeWalOptionsSnafu, InvalidTopicNamePrefixSnafu, Result};
 use crate::key::NAME_PATTERN_REGEX;
 use crate::kv_backend::KvBackendRef;
 use crate::leadership_notifier::LeadershipChangeListener;
-pub use crate::wal_options_allocator::topic_creator::build_kafka_topic_creator;
+pub use crate::wal_options_allocator::topic_creator::{
+    build_kafka_client, build_kafka_topic_creator,
+};
 use crate::wal_options_allocator::topic_pool::KafkaTopicPool;

 /// Allocates wal options in region granularity.
--- a/src/common/meta/src/wal_options_allocator/topic_creator.rs
+++ b/src/common/meta/src/wal_options_allocator/topic_creator.rs
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::sync::Arc;
-
 use common_telemetry::{error, info};
 use common_wal::config::kafka::common::DEFAULT_BACKOFF_CONFIG;
 use common_wal::config::kafka::MetasrvKafkaConfig;
@@ -34,11 +32,9 @@ use crate::error::{
 // The `DEFAULT_PARTITION` refers to the index of the partition.
 const DEFAULT_PARTITION: i32 = 0;

-type KafkaClientRef = Arc<Client>;
-
 /// Creates topics in kafka.
 pub struct KafkaTopicCreator {
-    client: KafkaClientRef,
+    client: Client,
    /// The number of partitions per topic.
    num_partitions: i32,
    /// The replication factor of each topic.
@@ -48,7 +44,7 @@ pub struct KafkaTopicCreator {
 }

 impl KafkaTopicCreator {
-    pub fn client(&self) -> &KafkaClientRef {
+    pub fn client(&self) -> &Client {
        &self.client
    }

@@ -133,7 +129,8 @@ impl KafkaTopicCreator {
    }
 }

-pub async fn build_kafka_topic_creator(config: &MetasrvKafkaConfig) -> Result<KafkaTopicCreator> {
+/// Builds a kafka [Client](rskafka::client::Client).
+pub async fn build_kafka_client(config: &MetasrvKafkaConfig) -> Result<Client> {
    // Builds an kafka controller client for creating topics.
    let broker_endpoints = common_wal::resolve_to_ipv4(&config.connection.broker_endpoints)
        .await
@@ -145,15 +142,19 @@ pub async fn build_kafka_topic_creator(config: &MetasrvKafkaConfig) -> Result<Ka
    if let Some(tls) = &config.connection.tls {
        builder = builder.tls_config(tls.to_tls_config().await.context(TlsConfigSnafu)?)
    };
-    let client = builder
+    builder
        .build()
        .await
        .with_context(|_| BuildKafkaClientSnafu {
            broker_endpoints: config.connection.broker_endpoints.clone(),
-        })?;
+        })
+}

+/// Builds a [KafkaTopicCreator].
+pub async fn build_kafka_topic_creator(config: &MetasrvKafkaConfig) -> Result<KafkaTopicCreator> {
+    let client = build_kafka_client(config).await?;
    Ok(KafkaTopicCreator {
-        client: Arc::new(client),
+        client,
        num_partitions: config.kafka_topic.num_partitions,
        replication_factor: config.kafka_topic.replication_factor,
        create_topic_timeout: config.kafka_topic.create_topic_timeout.as_millis() as i32,
--- a/src/common/procedure/src/store/state_store.rs
+++ b/src/common/procedure/src/store/state_store.rs
@@ -137,6 +137,7 @@ impl StateStore for ObjectStateStore {
                ))
            })
            .context(PutStateSnafu { key })
+            .map(|_| ())
    }

    async fn walk_top_down(&self, path: &str) -> Result<KeyValueStream> {
--- a/src/common/query/src/logical_plan.rs
+++ b/src/common/query/src/logical_plan.rs
@@ -18,16 +18,19 @@ mod udaf;

 use std::sync::Arc;

+use api::v1::TableName;
 use datafusion::catalog::CatalogProviderList;
 use datafusion::error::Result as DatafusionResult;
 use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder};
-use datafusion_common::Column;
-use datafusion_expr::col;
+use datafusion_common::{Column, TableReference};
+use datafusion_expr::dml::InsertOp;
+use datafusion_expr::{col, DmlStatement, WriteOp};
 pub use expr::{build_filter_from_timestamp, build_same_type_ts_filter};
+use snafu::ResultExt;

 pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
 pub use self::udaf::AggregateFunction;
-use crate::error::Result;
+use crate::error::{GeneralDataFusionSnafu, Result};
 use crate::logical_plan::accumulator::*;
 use crate::signature::{Signature, Volatility};

@@ -79,6 +82,74 @@ pub fn rename_logical_plan_columns(
    LogicalPlanBuilder::from(plan).project(projection)?.build()
 }

+/// Convert a insert into logical plan to an (table_name, logical_plan)
+/// where table_name is the name of the table to insert into.
+/// logical_plan is the plan to be executed.
+///
+/// if input logical plan is not `insert into table_name <input>`, return None
+///
+/// Returned TableName will use provided catalog and schema if not specified in the logical plan,
+/// if table scan in logical plan have full table name, will **NOT** override it.
+pub fn breakup_insert_plan(
+    plan: &LogicalPlan,
+    default_catalog: &str,
+    default_schema: &str,
+) -> Option<(TableName, Arc<LogicalPlan>)> {
+    if let LogicalPlan::Dml(dml) = plan {
+        if dml.op != WriteOp::Insert(InsertOp::Append) {
+            return None;
+        }
+        let table_name = &dml.table_name;
+        let table_name = match table_name {
+            TableReference::Bare { table } => TableName {
+                catalog_name: default_catalog.to_string(),
+                schema_name: default_schema.to_string(),
+                table_name: table.to_string(),
+            },
+            TableReference::Partial { schema, table } => TableName {
+                catalog_name: default_catalog.to_string(),
+                schema_name: schema.to_string(),
+                table_name: table.to_string(),
+            },
+            TableReference::Full {
+                catalog,
+                schema,
+                table,
+            } => TableName {
+                catalog_name: catalog.to_string(),
+                schema_name: schema.to_string(),
+                table_name: table.to_string(),
+            },
+        };
+        let logical_plan = dml.input.clone();
+        Some((table_name, logical_plan))
+    } else {
+        None
+    }
+}
+
+/// create a `insert into table_name <input>` logical plan
+pub fn add_insert_to_logical_plan(
+    table_name: TableName,
+    table_schema: datafusion_common::DFSchemaRef,
+    input: LogicalPlan,
+) -> Result<LogicalPlan> {
+    let table_name = TableReference::Full {
+        catalog: table_name.catalog_name.into(),
+        schema: table_name.schema_name.into(),
+        table: table_name.table_name.into(),
+    };
+
+    let plan = LogicalPlan::Dml(DmlStatement::new(
+        table_name,
+        table_schema,
+        WriteOp::Insert(InsertOp::Append),
+        Arc::new(input),
+    ));
+    let plan = plan.recompute_schema().context(GeneralDataFusionSnafu)?;
+    Ok(plan)
+}
+
 /// The datafusion `[LogicalPlan]` decoder.
 #[async_trait::async_trait]
 pub trait SubstraitPlanDecoder {
--- a/src/common/query/src/logical_plan/accumulator.rs
+++ b/src/common/query/src/logical_plan/accumulator.rs
@@ -24,7 +24,7 @@ use datatypes::prelude::*;
 use datatypes::vectors::{Helper as VectorHelper, VectorRef};
 use snafu::ResultExt;

-use crate::error::{self, Error, FromScalarValueSnafu, IntoVectorSnafu, Result};
+use crate::error::{self, FromScalarValueSnafu, IntoVectorSnafu, Result};
 use crate::prelude::*;

 pub type AggregateFunctionCreatorRef = Arc<dyn AggregateFunctionCreator>;
@@ -166,8 +166,7 @@ impl DfAccumulator for DfAccumulatorAdaptor {
        let output_type = self.creator.output_type()?;
        let scalar_value = value
            .try_to_scalar_value(&output_type)
-            .context(error::ToScalarValueSnafu)
-            .map_err(Error::from)?;
+            .context(error::ToScalarValueSnafu)?;
        Ok(scalar_value)
    }

--- a/Show More
+++ b/Show More