fix: close issue #6586 make pg also show error as mysql (#6587 )

* fix: close issue #6586 make pg also show error as mysql Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: address comments Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: drop useless debug print Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: revert wrong change Signed-off-by: yihong0618 <zouzou0208@gmail.com> * refactor: convert error types * refactor: inline * chore: minimize changes * fix: address comments Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: make clippy happy Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: address comments Signed-off-by: yihong0618 <zouzou0208@gmail.com> * refactor: convert datafusion error to ErrorExt Signed-off-by: Ning Sun <sunning@greptime.com> * fix: headers ? Signed-off-by: yihong0618 <zouzou0208@gmail.com> --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: Ning Sun <sunning@greptime.com> Co-authored-by: Ning Sun <sunning@greptime.com>
fix: ignore target files in make fmt-check (#6560 )
2025-12-22 22:20:02 +00:00 · 2025-07-25 09:59:26 +00:00 · 2025-07-25 09:54:38 +00:00 · 2025-07-25 08:31:11 +00:00 · 2025-07-25 03:05:54 +00:00 · 2025-07-24 09:56:41 +00:00
708 changed files with 49658 additions and 21476 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -12,3 +12,6 @@ fetch = true
 checkout = true
 list_files = true
 internal_use_git2 = false
+
+[env]
+CARGO_WORKSPACE_DIR = { value = "", relative = true }
--- a/.github/labeler.yaml
+++ b/.github/labeler.yaml
@@ -0,0 +1,15 @@
+ci:
+  - changed-files:
+      - any-glob-to-any-file: .github/**
+
+docker:
+  - changed-files:
+      - any-glob-to-any-file: docker/**
+
+documentation:
+  - changed-files:
+      - any-glob-to-any-file: docs/**
+
+dashboard:
+  - changed-files:
+      - any-glob-to-any-file: grafana/**
--- a/.github/scripts/check-version.sh
+++ b/.github/scripts/check-version.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Get current version
+CURRENT_VERSION=$1
+if [ -z "$CURRENT_VERSION" ]; then
+  echo "Error: Failed to get current version"
+  exit 1
+fi
+
+# Get the latest version from GitHub Releases
+API_RESPONSE=$(curl -s "https://api.github.com/repos/GreptimeTeam/greptimedb/releases/latest")
+
+if [ -z "$API_RESPONSE" ] || [ "$(echo "$API_RESPONSE" | jq -r '.message')" = "Not Found" ]; then
+  echo "Error: Failed to fetch latest version from GitHub"
+  exit 1
+fi
+
+# Get the latest version
+LATEST_VERSION=$(echo "$API_RESPONSE" | jq -r '.tag_name')
+
+if [ -z "$LATEST_VERSION" ] || [ "$LATEST_VERSION" = "null" ]; then
+  echo "Error: No valid version found in GitHub releases"
+  exit 1
+fi
+
+# Cleaned up version number format (removed possible 'v' prefix and -nightly suffix)
+CLEAN_CURRENT=$(echo "$CURRENT_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//')
+CLEAN_LATEST=$(echo "$LATEST_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//')
+
+echo "Current version: $CLEAN_CURRENT"
+echo "Latest release version: $CLEAN_LATEST"
+
+# Use sort -V to compare versions
+HIGHER_VERSION=$(printf "%s\n%s" "$CLEAN_CURRENT" "$CLEAN_LATEST" | sort -V | tail -n1)
+
+if [ "$HIGHER_VERSION" = "$CLEAN_CURRENT" ]; then
+  echo "Current version ($CLEAN_CURRENT) is NEWER than or EQUAL to latest ($CLEAN_LATEST)"
+  echo "should-push-latest-tag=true" >> $GITHUB_OUTPUT
+else
+  echo "Current version ($CLEAN_CURRENT) is OLDER than latest ($CLEAN_LATEST)"
+  echo "should-push-latest-tag=false" >> $GITHUB_OUTPUT
+fi
--- a/.github/workflows/pr-labeling.yaml
+++ b/.github/workflows/pr-labeling.yaml
@@ -0,0 +1,42 @@
+name: 'PR Labeling'
+
+on:
+  pull_request_target:
+    types:
+      - opened
+      - synchronize
+      - reopened
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+jobs:
+  labeler:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v4
+
+      - uses: actions/labeler@v5
+        with:
+          configuration-path: ".github/labeler.yaml"
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
+
+  size-label:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: pascalgn/size-label-action@v0.5.5
+        env:
+          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+        with:
+          sizes: >
+            {
+              "0": "XS",
+              "100": "S",
+              "300": "M",
+              "1000": "L",
+              "1500": "XL",
+              "2000": "XXL"
+            }
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -110,6 +110,8 @@ jobs:

      # The 'version' use as the global tag name of the release workflow.
      version: ${{ steps.create-version.outputs.version }}
+
+      should-push-latest-tag: ${{ steps.check-version.outputs.should-push-latest-tag }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -135,6 +137,11 @@ jobs:
          GITHUB_REF_NAME: ${{ github.ref_name }}
          NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}

+      - name: Check version
+        id: check-version
+        run: |
+          ./.github/scripts/check-version.sh "${{ steps.create-version.outputs.version }}"
+
      - name: Allocate linux-amd64 runner
        if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
        uses: ./.github/actions/start-runner
@@ -314,7 +321,7 @@ jobs:
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

      - name: Set build image result
        id: set-build-image-result
@@ -332,7 +339,7 @@ jobs:
      build-windows-artifacts,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-latest-16-cores
    # When we push to ACR, it's easy to fail due to some unknown network issues.
    # However, we don't want to fail the whole workflow because of this.
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -361,7 +368,7 @@ jobs:
          dev-mode: false
          upload-to-s3: true
          update-version-info: true
-          push-latest-tag: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

  publish-github-release:
    name: Create GitHub release and upload artifacts
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -11,17 +11,17 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

+permissions:
+  issues: write
+  contents: write
+  pull-requests: write
+
 jobs:
  check:
    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: write  # Add permissions to modify PRs
-      issues: write
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
-        with:
-          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Check Pull Request
        working-directory: cyborg
--- a/.gitignore
+++ b/.gitignore
@@ -60,4 +60,7 @@ tests-fuzz/corpus/
 greptimedb_data

 # github
-!/.github
+!/.github
+
+# Claude code
+CLAUDE.md
--- a/AUTHOR.md
+++ b/AUTHOR.md
@@ -10,12 +10,10 @@
 * [NiwakaDev](https://github.com/NiwakaDev)
 * [tisonkun](https://github.com/tisonkun)

-
 ## Team Members (in alphabetical order)

 * [apdong2022](https://github.com/apdong2022)
 * [beryl678](https://github.com/beryl678)
-* [Breeze-P](https://github.com/Breeze-P)
 * [daviderli614](https://github.com/daviderli614)
 * [discord9](https://github.com/discord9)
 * [evenyag](https://github.com/evenyag)
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,7 @@ members = [
    "src/common/datasource",
    "src/common/decimal",
    "src/common/error",
+    "src/common/event-recorder",
    "src/common/frontend",
    "src/common/function",
    "src/common/greptimedb-telemetry",
@@ -30,6 +31,7 @@ members = [
    "src/common/recordbatch",
    "src/common/runtime",
    "src/common/session",
+    "src/common/sql",
    "src/common/stat",
    "src/common/substrait",
    "src/common/telemetry",
@@ -71,11 +73,13 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.15.0"
+version = "0.16.0"
 edition = "2021"
 license = "Apache-2.0"

 [workspace.lints]
+clippy.print_stdout = "warn"
+clippy.print_stderr = "warn"
 clippy.dbg_macro = "warn"
 clippy.implicit_clone = "warn"
 clippy.result_large_err = "allow"
@@ -121,6 +125,7 @@ datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "
 datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-functions-aggregate-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
@@ -130,11 +135,12 @@ deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
 dotenv = "0.15"
+either = "1.15"
 etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "f0913f179ee1d2ce428f8b85a9ea12b5f69ad636" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "7fcaa3e413947a7a28d9af95812af26c1939ce78" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -162,14 +168,13 @@ opentelemetry-proto = { version = "0.27", features = [
    "with-serde",
    "logs",
 ] }
+ordered-float = { version = "4.3", features = ["serde"] }
 parking_lot = "0.12"
 parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", rev = "0410e8b459dda7cb222ce9596f8bf3971bd07bd2", features = [
-    "ser",
-] }
+promql-parser = { version = "0.6", features = ["ser"] }
 prost = { version = "0.13", features = ["no-recursion-limit"] }
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
@@ -220,10 +225,13 @@ tokio-util = { version = "0.7", features = ["io-util", "compat"] }
 toml = "0.8.8"
 tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
 tower = "0.5"
+tower-http = "0.6"
+tracing = "0.1"
 tracing-appender = "0.2"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
 typetag = "0.2"
 uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
+vrl = "0.25"
 zstd = "0.13"
 # DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES

@@ -241,6 +249,7 @@ common-config = { path = "src/common/config" }
 common-datasource = { path = "src/common/datasource" }
 common-decimal = { path = "src/common/decimal" }
 common-error = { path = "src/common/error" }
+common-event-recorder = { path = "src/common/event-recorder" }
 common-frontend = { path = "src/common/frontend" }
 common-function = { path = "src/common/function" }
 common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" }
@@ -258,6 +267,7 @@ common-query = { path = "src/common/query" }
 common-recordbatch = { path = "src/common/recordbatch" }
 common-runtime = { path = "src/common/runtime" }
 common-session = { path = "src/common/session" }
+common-sql = { path = "src/common/sql" }
 common-telemetry = { path = "src/common/telemetry" }
 common-test-util = { path = "src/common/test-util" }
 common-time = { path = "src/common/time" }
--- a/README.md
+++ b/README.md
@@ -75,9 +75,9 @@
 | --------- | ----------- |
 | [Unified Observability Data](https://docs.greptime.com/user-guide/concepts/why-greptimedb) | Store metrics, logs, and traces as timestamped, contextual wide events. Query via [SQL](https://docs.greptime.com/user-guide/query-data/sql), [PromQL](https://docs.greptime.com/user-guide/query-data/promql), and [streaming](https://docs.greptime.com/user-guide/flow-computation/overview). |
 | [High Performance & Cost Effective](https://docs.greptime.com/user-guide/manage-data/data-index) | Written in Rust, with a distributed query engine, [rich indexing](https://docs.greptime.com/user-guide/manage-data/data-index), and optimized columnar storage, delivering sub-second responses at PB scale. |
-| [Cloud-Native Architecture](https://docs.greptime.com/user-guide/concepts/architecture) | Designed for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management), with compute/storage separation, native object storage (AWS S3, Azure Blob, etc.) and seamless cross-cloud access. |
+| [Cloud-Native Architecture](https://docs.greptime.com/user-guide/concepts/architecture) | Designed for [Kubernetes](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/greptimedb-operator-management), with compute/storage separation, native object storage (AWS S3, Azure Blob, etc.) and seamless cross-cloud access. |
 | [Developer-Friendly](https://docs.greptime.com/user-guide/protocols/overview) | Access via SQL/PromQL interfaces, REST API, MySQL/PostgreSQL protocols, and popular ingestion [protocols](https://docs.greptime.com/user-guide/protocols/overview). |
-| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments/overview) | Deploy anywhere: edge (including ARM/[Android](https://docs.greptime.com/user-guide/deployments/run-on-android)) or cloud, with unified APIs and efficient data sync. |
+| [Flexible Deployment](https://docs.greptime.com/user-guide/deployments-administration/overview) | Deploy anywhere: edge (including ARM/[Android](https://docs.greptime.com/user-guide/deployments-administration/run-on-android)) or cloud, with unified APIs and efficient data sync. |

 Learn more in [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb) and [Observability 2.0 and the Database for It](https://greptime.com/blogs/2025-04-25-greptimedb-observability2-new-database).

@@ -189,7 +189,8 @@ We invite you to engage and contribute!
 - [Official Website](https://greptime.com/)
 - [Blog](https://greptime.com/blogs/)
 - [LinkedIn](https://www.linkedin.com/company/greptime/)
- [Twitter](https://twitter.com/greptime)
+- [X (Twitter)](https://X.com/greptime)
+- [YouTube](https://www.youtube.com/@greptime)

 ## License

--- a/config/config.md
+++ b/config/config.md
@@ -123,6 +123,7 @@
 | `storage.http_client.connect_timeout` | String | `30s` | The timeout for only the connect phase of a http client. |
 | `storage.http_client.timeout` | String | `30s` | The total request timeout, applied from when the request starts connecting until the response body has finished.<br/>Also considered a total deadline. |
 | `storage.http_client.pool_idle_timeout` | String | `90s` | The timeout for idle sockets being kept-alive. |
+| `storage.http_client.skip_ssl_validation` | Bool | `false` | To skip the ssl verification<br/>**Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks. |
 | `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
 | `region_engine.mito` | -- | -- | The Mito engine options. |
 | `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
@@ -184,10 +185,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
@@ -287,10 +289,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
@@ -322,6 +325,7 @@
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
+| `region_failure_detector_initialization_delay` | String | `10m` | The delay before starting region failure detection.<br/>This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.<br/>Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled. |
 | `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
 | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
@@ -369,10 +373,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `export_metrics` | -- | -- | The metasrv can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
@@ -431,8 +436,8 @@
 | `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
 | `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `1m` | The interval to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -471,6 +476,7 @@
 | `storage.http_client.connect_timeout` | String | `30s` | The timeout for only the connect phase of a http client. |
 | `storage.http_client.timeout` | String | `30s` | The total request timeout, applied from when the request starts connecting until the response body has finished.<br/>Also considered a total deadline. |
 | `storage.http_client.pool_idle_timeout` | String | `90s` | The timeout for idle sockets being kept-alive. |
+| `storage.http_client.skip_ssl_validation` | Bool | `false` | To skip the ssl verification<br/>**Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks. |
 | `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
 | `region_engine.mito` | -- | -- | The Mito engine options. |
 | `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
@@ -532,10 +538,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
@@ -555,6 +562,16 @@
 | `node_id` | Integer | Unset | The flownode identifier and should be unique in the cluster. |
 | `flow` | -- | -- | flow engine options. |
 | `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
+| `flow.batching_mode` | -- | -- | -- |
+| `flow.batching_mode.query_timeout` | String | `600s` | The default batching engine query timeout is 10 minutes. |
+| `flow.batching_mode.slow_query_threshold` | String | `60s` | will output a warn log for any query that runs for more that this threshold |
+| `flow.batching_mode.experimental_min_refresh_duration` | String | `5s` | The minimum duration between two queries execution by batching mode task |
+| `flow.batching_mode.grpc_conn_timeout` | String | `5s` | The gRPC connection timeout |
+| `flow.batching_mode.experimental_grpc_max_retries` | Integer | `3` | The gRPC max retry number |
+| `flow.batching_mode.experimental_frontend_scan_timeout` | String | `30s` | Flow wait for available frontend timeout,<br/>if failed to find available frontend after frontend_scan_timeout elapsed, return error<br/>which prevent flownode from starting |
+| `flow.batching_mode.experimental_frontend_activity_timeout` | String | `60s` | Frontend activity timeout<br/>if frontend is down(not sending heartbeat) for more than frontend_activity_timeout,<br/>it will be removed from the list that flownode use to connect |
+| `flow.batching_mode.experimental_max_filter_num_per_query` | Integer | `20` | Maximum number of filters allowed in a single query |
+| `flow.batching_mode.experimental_time_window_merge_threshold` | Integer | `3` | Time window merge distance |
 | `grpc` | -- | -- | The gRPC server options. |
 | `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
@@ -582,11 +599,14 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
+| `query` | -- | -- | -- |
+| `query.parallelism` | Integer | `1` | Parallelism of the query engine for query sent by flownode.<br/>Default to 1, so it won't use too much cpu or memory |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -129,11 +129,11 @@ dir = "./greptimedb_data/wal"
 ## **It's only used when the provider is `raft_engine`**.
 file_size = "128MB"

-## The threshold of the WAL size to trigger a flush.
+## The threshold of the WAL size to trigger a purge.
 ## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "1GB"

-## The interval to trigger a flush.
+## The interval to trigger a purge.
 ## **It's only used when the provider is `raft_engine`**.
 purge_interval = "1m"

@@ -367,6 +367,10 @@ timeout = "30s"
 ## The timeout for idle sockets being kept-alive.
 pool_idle_timeout = "90s"

+## To skip the ssl verification
+## **Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks.
+skip_ssl_validation = false
+
 # Custom storage options
 # [[storage.providers]]
 # name = "S3"
@@ -625,7 +629,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -636,6 +640,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -7,6 +7,29 @@ node_id = 14
 ## The number of flow worker in flownode.
 ## Not setting(or set to 0) this value will use the number of CPU cores divided by 2.
 #+num_workers=0
+[flow.batching_mode]
+## The default batching engine query timeout is 10 minutes.
+#+query_timeout="600s"
+## will output a warn log for any query that runs for more that this threshold
+#+slow_query_threshold="60s"
+## The minimum duration between two queries execution by batching mode task
+#+experimental_min_refresh_duration="5s"
+## The gRPC connection timeout
+#+grpc_conn_timeout="5s"
+## The gRPC max retry number
+#+experimental_grpc_max_retries=3
+## Flow wait for available frontend timeout,
+## if failed to find available frontend after frontend_scan_timeout elapsed, return error
+## which prevent flownode from starting
+#+experimental_frontend_scan_timeout="30s"
+## Frontend activity timeout
+## if frontend is down(not sending heartbeat) for more than frontend_activity_timeout,
+## it will be removed from the list that flownode use to connect
+#+experimental_frontend_activity_timeout="60s"
+## Maximum number of filters allowed in a single query
+#+experimental_max_filter_num_per_query=20
+## Time window merge distance
+#+experimental_time_window_merge_threshold=3

 ## The gRPC server options.
 [grpc]
@@ -83,7 +106,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -94,6 +117,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
@@ -105,3 +131,8 @@ default_ratio = 1.0
 ## The tokio console address.
 ## @toml2docs:none-default
 #+ tokio_console_addr = "127.0.0.1"
+
+[query]
+## Parallelism of the query engine for query sent by flownode.
+## Default to 1, so it won't use too much cpu or memory
+parallelism = 1
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -218,7 +218,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -229,6 +229,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -43,6 +43,11 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

+## The delay before starting region failure detection.
+## This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.
+## Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled.
+region_failure_detector_initialization_delay = '10m'
+
 ## Whether to allow region failover on local WAL.
 ## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
 allow_region_failover_on_local_wal = false
@@ -220,7 +225,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -231,6 +236,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -458,6 +458,10 @@ timeout = "30s"
 ## The timeout for idle sockets being kept-alive.
 pool_idle_timeout = "90s"

+## To skip the ssl verification
+## **Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks.
+skip_ssl_validation = false
+
 # Custom storage options
 # [[storage.providers]]
 # name = "S3"
@@ -716,7 +720,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -727,6 +731,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/cyborg/bin/follow-up-docs-issue.ts
+++ b/cyborg/bin/follow-up-docs-issue.ts
@@ -55,12 +55,25 @@ async function main() {
        await client.rest.issues.addLabels({
            owner, repo, issue_number: number, labels: [labelDocsRequired],
        })
+
+        // Get available assignees for the docs repo
+        const assigneesResponse = await docsClient.rest.issues.listAssignees({
+            owner: 'GreptimeTeam',
+            repo: 'docs',
+        })
+        const validAssignees = assigneesResponse.data.map(assignee => assignee.login)
+        core.info(`Available assignees: ${validAssignees.join(', ')}`)
+
+        // Check if the actor is a valid assignee, otherwise fallback to fengjiachun
+        const assignee = validAssignees.includes(actor) ? actor : 'fengjiachun'
+        core.info(`Assigning issue to: ${assignee}`)
+
        await docsClient.rest.issues.create({
            owner: 'GreptimeTeam',
            repo: 'docs',
            title: `Update docs for ${title}`,
            body: `A document change request is generated from ${html_url}`,
-            assignee: actor,
+            assignee: assignee,
        }).then((res) => {
            core.info(`Created issue ${res.data}`)
        })
--- a/docs/benchmarks/log/README.md
+++ b/docs/benchmarks/log/README.md
@@ -48,4 +48,4 @@ Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [que

 ## Addition
 - You can tune GreptimeDB's configuration to get better performance.
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments/configuration#storage-options).
+- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments-administration/configuration#storage-options).
--- a/docs/how-to/how-to-write-aggregate-function.md
+++ b/docs/how-to/how-to-write-aggregate-function.md
@@ -1,4 +1,4 @@
-Currently, our query engine is based on DataFusion, so all aggregate function is executed by DataFusion, through its UDAF interface. You can find DataFusion's UDAF example [here](https://github.com/apache/arrow-datafusion/blob/arrow2/datafusion-examples/examples/simple_udaf.rs). Basically, we provide the same way as DataFusion to write aggregate functions: both are centered in a struct called "Accumulator" to accumulates states along the way in aggregation.
+Currently, our query engine is based on DataFusion, so all aggregate function is executed by DataFusion, through its UDAF interface. You can find DataFusion's UDAF example [here](https://github.com/apache/datafusion/tree/main/datafusion-examples/examples/simple_udaf.rs). Basically, we provide the same way as DataFusion to write aggregate functions: both are centered in a struct called "Accumulator" to accumulates states along the way in aggregation.

 However, DataFusion's UDAF implementation has a huge restriction, that it requires user to provide a concrete "Accumulator". Take `Median` aggregate function for example, to aggregate a `u32` datatype column, you have to write a `MedianU32`, and use `SELECT MEDIANU32(x)` in SQL. `MedianU32` cannot be used to aggregate a `i32` datatype column. Or, there's another way: you can use a special type that can hold all kinds of data (like our `Value` enum or Arrow's `ScalarValue`), and `match` all the way up to do aggregate calculations. It might work, though rather tedious. (But I think it's DataFusion's preferred way to write UDAF.)

--- a/docs/rfcs/2023-02-01-table-compaction.md
+++ b/docs/rfcs/2023-02-01-table-compaction.md
@@ -76,7 +76,7 @@ pub trait CompactionStrategy {
 ```

 The most suitable compaction strategy for time-series scenario would be 
-a hybrid strategy that combines time window compaction with size-tired compaction, just like [Cassandra](https://cassandra.apache.org/doc/latest/cassandra/operating/compaction/twcs.html) and [ScyllaDB](https://docs.scylladb.com/stable/architecture/compaction/compaction-strategies.html#time-window-compaction-strategy-twcs) does.
+a hybrid strategy that combines time window compaction with size-tired compaction, just like [Cassandra](https://cassandra.apache.org/doc/latest/cassandra/managing/operating/compaction/twcs.html) and [ScyllaDB](https://docs.scylladb.com/stable/architecture/compaction/compaction-strategies.html#time-window-compaction-strategy-twcs) does.

 We can first group SSTs in level n into buckets according to some predefined time window. Within that window, 
 SSTs are compacted in a size-tired manner (find SSTs with similar size and compact them to level n+1). 
--- a/docs/rfcs/2024-01-17-dataflow-framework.md
+++ b/docs/rfcs/2024-01-17-dataflow-framework.md
@@ -28,7 +28,7 @@ In order to do those things while maintaining a low memory footprint, you need t
 - Greptime Flow's is built on top of [Hydroflow](https://github.com/hydro-project/hydroflow).
 - We have three choices for the Dataflow/Streaming process framework for our simple continuous aggregation feature:
 1. Based on the timely/differential dataflow crate that [materialize](https://github.com/MaterializeInc/materialize) based on. Later, it's proved too obscure for a simple usage, and is hard to customize memory usage control.
-2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
+2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/crates/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
 3. Based on a simple and lower level dataflow framework that someone else write, like [hydroflow](https://github.com/hydro-project/hydroflow), this approach combines the best of both worlds. Firstly, it boasts ease of comprehension and customization. Secondly, the dataflow framework offers precisely the necessary features for crafting uncomplicated single-node dataflow programs while delivering decent performance.

 Hence, we choose the third option, and use a simple logical plan that's anagonistic to the underlying dataflow framework, as it only describe how the dataflow graph should be doing, not how it do that. And we built operator in hydroflow to execute the plan. And the result hydroflow graph is wrapped in a engine that only support data in/out and tick event to flush and compute the result. This provide a thin middle layer that's easy to maintain and allow switching to other dataflow framework if necessary.
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -83,7 +83,7 @@ If you use the [Helm Chart](https://github.com/GreptimeTeam/helm-charts) to depl
 - `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
 - `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;

-The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/getting-started).
+The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/overview).

 ### Self-host Prometheus and import dashboards manually

--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -70,19 +70,20 @@
 | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 | Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`<br/>`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
 | Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
 | Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
-| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
-| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
-| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
-| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| Read QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"read\|Reader::read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation=~"read\|Reader::read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Write QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"write\|Writer::write\|Writer::close"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation =~ "Writer::write\|Writer::close\|write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
-| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
 # Metasrv
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -612,6 +612,21 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+        - title: Active Series and Field Builders Count
+          type: timeseries
+          description: Compaction oinput output bytes
+          unit: none
+          queries:
+            - expr: sum by(instance, pod) (greptime_mito_memtable_active_series_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-series'
+            - expr: sum by(instance, pod) (greptime_mito_memtable_field_builder_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-field_builders'
        - title: Region Worker Convert Requests
          type: timeseries
          description: Per-stage elapsed time for region worker to decode requests.
@@ -644,41 +659,41 @@ groups:
          description: Read QPS per Instance.
          unit: ops
          queries:
-            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"read|Reader::read"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Read P99 per Instance
          type: timeseries
          description: Read P99 per Instance.
          unit: s
          queries:
-            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation=~"read|Reader::read"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Write QPS per Instance
          type: timeseries
          description: Write QPS per Instance.
          unit: ops
          queries:
-            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"write|Writer::write|Writer::close"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Write P99 per Instance
          type: timeseries
          description: Write P99 per Instance.
          unit: s
          queries:
-            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation =~ "Writer::write|Writer::close|write"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: List QPS per Instance
          type: timeseries
          description: List QPS per Instance.
@@ -714,7 +729,7 @@ groups:
          description: Other Request P99 per Instance.
          unit: s
          queries:
-            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list|Writer::write|Writer::close|Reader::read"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -70,19 +70,20 @@
 | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 | Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`<br/>`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
 | Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
 | Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
 | QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
-| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
-| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
-| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
-| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
+| Read QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"read\|Reader::read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{operation=~"read\|Reader::read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Write QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"write\|Writer::write\|Writer::close"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation =~ "Writer::write\|Writer::close\|write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
 | Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
-| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
+| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
 # Metasrv
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -612,6 +612,21 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+        - title: Active Series and Field Builders Count
+          type: timeseries
+          description: Compaction oinput output bytes
+          unit: none
+          queries:
+            - expr: sum by(instance, pod) (greptime_mito_memtable_active_series_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-series'
+            - expr: sum by(instance, pod) (greptime_mito_memtable_field_builder_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-field_builders'
        - title: Region Worker Convert Requests
          type: timeseries
          description: Per-stage elapsed time for region worker to decode requests.
@@ -644,41 +659,41 @@ groups:
          description: Read QPS per Instance.
          unit: ops
          queries:
-            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"read|Reader::read"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Read P99 per Instance
          type: timeseries
          description: Read P99 per Instance.
          unit: s
          queries:
-            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{operation=~"read|Reader::read"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Write QPS per Instance
          type: timeseries
          description: Write QPS per Instance.
          unit: ops
          queries:
-            - expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
+            - expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"write|Writer::write|Writer::close"}[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: Write P99 per Instance
          type: timeseries
          description: Write P99 per Instance.
          unit: s
          queries:
-            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation =~ "Writer::write|Writer::close|write"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
        - title: List QPS per Instance
          type: timeseries
          description: List QPS per Instance.
@@ -714,7 +729,7 @@ groups:
          description: Other Request P99 per Instance.
          unit: s
          queries:
-            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
+            - expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list|Writer::write|Writer::close|Reader::read"}[$__rate_interval])))
              datasource:
                type: prometheus
                uid: ${metrics}
--- a/licenserc.toml
+++ b/licenserc.toml
@@ -31,8 +31,10 @@ excludes = [
    "src/operator/src/expr_helper/trigger.rs",
    "src/sql/src/statements/create/trigger.rs",
    "src/sql/src/statements/show/trigger.rs",
+    "src/sql/src/statements/drop/trigger.rs",
    "src/sql/src/parsers/create_parser/trigger.rs",
    "src/sql/src/parsers/show_parser/trigger.rs",
+    "src/mito2/src/extension.rs",
 ]

 [properties]
--- a/scripts/check-super-imports.py
+++ b/scripts/check-super-imports.py
@@ -13,8 +13,8 @@
 # limitations under the License.

 import os
-import re
 from multiprocessing import Pool
+from pathlib import Path


 def find_rust_files(directory):
@@ -24,6 +24,10 @@ def find_rust_files(directory):
        if "test" in root.lower():
            continue

+        # Skip the target directory
+        if "target" in Path(root).parts:
+            continue
+
        for file in files:
            # Skip files with "test" in the filename
            if "test" in file.lower():
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -53,6 +53,54 @@ get_arch_type() {
  esac
 }

+# Verify SHA256 checksum
+verify_sha256() {
+  file="$1"
+  expected_sha256="$2"
+
+  if command -v sha256sum >/dev/null 2>&1; then
+    actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
+  elif command -v shasum >/dev/null 2>&1; then
+    actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
+  else
+    echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
+    return 0
+  fi
+
+  if [ "$actual_sha256" = "$expected_sha256" ]; then
+    echo "SHA256 checksum verified successfully."
+    return 0
+  else
+    echo "Error: SHA256 checksum verification failed!"
+    echo "Expected: $expected_sha256"
+    echo "Actual: $actual_sha256"
+    return 1
+  fi
+}
+
+# Prompt for user confirmation (compatible with different shells)
+prompt_confirmation() {
+  message="$1"
+  printf "%s (y/N): " "$message"
+
+  # Try to read user input, fallback if read fails
+  answer=""
+  if read answer </dev/tty 2>/dev/null; then
+    case "$answer" in
+      [Yy]|[Yy][Ee][Ss])
+        return 0
+        ;;
+      *)
+        return 1
+        ;;
+    esac
+  else
+    echo ""
+    echo "Cannot read user input. Defaulting to No."
+    return 1
+  fi
+}
+
 download_artifact() {
  if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
    # Use the latest stable released version.
@@ -71,17 +119,104 @@ download_artifact() {
    fi

    echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
-    PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
+    PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
+    PACKAGE_NAME="${PKG_NAME}.tar.gz"
+    SHA256_FILE="${PKG_NAME}.sha256sum"

    if [ -n "${PACKAGE_NAME}" ]; then
-      wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"
+      # Check if files already exist and prompt for override
+      if [ -f "${PACKAGE_NAME}" ]; then
+        echo "File ${PACKAGE_NAME} already exists."
+        if prompt_confirmation "Do you want to override it?"; then
+          echo "Overriding existing file..."
+          rm -f "${PACKAGE_NAME}"
+        else
+          echo "Skipping download. Using existing file."
+        fi
+      fi
+
+      if [ -f "${BIN}" ]; then
+        echo "Binary ${BIN} already exists."
+        if prompt_confirmation "Do you want to override it?"; then
+          echo "Will override existing binary..."
+          rm -f "${BIN}"
+        else
+          echo "Installation cancelled."
+          exit 0
+        fi
+      fi
+
+      # Download package if not exists
+      if [ ! -f "${PACKAGE_NAME}" ]; then
+        echo "Downloading ${PACKAGE_NAME}..."
+        # Use curl instead of wget for better compatibility
+        if command -v curl >/dev/null 2>&1; then
+          if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
+            echo "Error: Failed to download ${PACKAGE_NAME}"
+            exit 1
+          fi
+        elif command -v wget >/dev/null 2>&1; then
+          if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
+            echo "Error: Failed to download ${PACKAGE_NAME}"
+            exit 1
+          fi
+        else
+          echo "Error: Neither curl nor wget is available for downloading."
+          exit 1
+        fi
+      fi
+
+      # Download and verify SHA256 checksum
+      echo "Downloading SHA256 checksum..."
+      sha256_download_success=0
+      if command -v curl >/dev/null 2>&1; then
+        if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
+          sha256_download_success=1
+        fi
+      elif command -v wget >/dev/null 2>&1; then
+        if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
+          sha256_download_success=1
+        fi
+      fi
+
+      if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
+        expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
+        if [ -n "$expected_sha256" ]; then
+          if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
+            echo "SHA256 verification failed. Removing downloaded file."
+            rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
+            exit 1
+          fi
+        else
+          echo "Warning: Could not parse SHA256 checksum from file."
+        fi
+        rm -f "${SHA256_FILE}"
+      else
+        echo "Warning: Could not download SHA256 checksum file. Skipping verification."
+      fi

      # Extract the binary and clean the rest.
-      tar xvf "${PACKAGE_NAME}" && \
-      mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
-      rm -r "${PACKAGE_NAME}" && \
-      rm -r "${PACKAGE_NAME%.tar.gz}" && \
-      echo "Run './${BIN} --help' to get started"
+      echo "Extracting ${PACKAGE_NAME}..."
+      if ! tar xf "${PACKAGE_NAME}"; then
+        echo "Error: Failed to extract ${PACKAGE_NAME}"
+        exit 1
+      fi
+
+      # Find the binary in the extracted directory
+      extracted_dir="${PACKAGE_NAME%.tar.gz}"
+      if [ -f "${extracted_dir}/${BIN}" ]; then
+        mv "${extracted_dir}/${BIN}" "${PWD}/"
+        rm -f "${PACKAGE_NAME}"
+        rm -rf "${extracted_dir}"
+        chmod +x "${BIN}"
+        echo "Installation completed successfully!"
+        echo "Run './${BIN} --help' to get started"
+      else
+        echo "Error: Binary ${BIN} not found in extracted archive"
+        rm -f "${PACKAGE_NAME}"
+        rm -rf "${extracted_dir}"
+        exit 1
+      fi
    fi
  fi
 }
--- a/src/api/src/region.rs
+++ b/src/api/src/region.rs
@@ -22,6 +22,7 @@ use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
 pub struct RegionResponse {
    pub affected_rows: AffectedRows,
    pub extensions: HashMap<String, Vec<u8>>,
+    pub metadata: Vec<u8>,
 }

 impl RegionResponse {
@@ -29,6 +30,7 @@ impl RegionResponse {
        Self {
            affected_rows: region_response.affected_rows as _,
            extensions: region_response.extensions,
+            metadata: region_response.metadata,
        }
    }

@@ -37,6 +39,16 @@ impl RegionResponse {
        Self {
            affected_rows,
            extensions: Default::default(),
+            metadata: Vec::new(),
+        }
+    }
+
+    /// Creates one response with metadata.
+    pub fn from_metadata(metadata: Vec<u8>) -> Self {
+        Self {
+            affected_rows: 0,
+            extensions: Default::default(),
+            metadata,
        }
    }
 }
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -226,18 +226,20 @@ mod tests {
        assert!(options.is_none());

        let mut schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true)
-            .with_fulltext_options(FulltextOptions {
-                enable: true,
-                analyzer: FulltextAnalyzer::English,
-                case_sensitive: false,
-                backend: FulltextBackend::Bloom,
-            })
+            .with_fulltext_options(FulltextOptions::new_unchecked(
+                true,
+                FulltextAnalyzer::English,
+                false,
+                FulltextBackend::Bloom,
+                10240,
+                0.01,
+            ))
            .unwrap();
        schema.set_inverted_index(true);
        let options = options_from_column_schema(&schema).unwrap();
        assert_eq!(
            options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
-            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
+            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
        );
        assert_eq!(
            options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
@@ -247,16 +249,18 @@ mod tests {

    #[test]
    fn test_options_with_fulltext() {
-        let fulltext = FulltextOptions {
-            enable: true,
-            analyzer: FulltextAnalyzer::English,
-            case_sensitive: false,
-            backend: FulltextBackend::Bloom,
-        };
+        let fulltext = FulltextOptions::new_unchecked(
+            true,
+            FulltextAnalyzer::English,
+            false,
+            FulltextBackend::Bloom,
+            10240,
+            0.01,
+        );
        let options = options_from_fulltext(&fulltext).unwrap().unwrap();
        assert_eq!(
            options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
-            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
+            "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
        );
    }

--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -5,6 +5,7 @@ edition.workspace = true
 license.workspace = true

 [features]
+enterprise = []
 testing = []

 [lints]
--- a/src/catalog/src/kvbackend.rs
+++ b/src/catalog/src/kvbackend.rs
@@ -14,9 +14,11 @@

 pub use client::{CachedKvBackend, CachedKvBackendBuilder, MetaKvBackend};

+mod builder;
 mod client;
 mod manager;
 mod table_cache;

+pub use builder::KvBackendCatalogManagerBuilder;
 pub use manager::KvBackendCatalogManager;
 pub use table_cache::{new_table_cache, TableCache, TableCacheRef};
--- a/src/catalog/src/kvbackend/builder.rs
+++ b/src/catalog/src/kvbackend/builder.rs
@@ -0,0 +1,131 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_catalog::consts::DEFAULT_CATALOG_NAME;
+use common_meta::cache::LayeredCacheRegistryRef;
+use common_meta::key::flow::FlowMetadataManager;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use common_procedure::ProcedureManagerRef;
+use moka::sync::Cache;
+use partition::manager::PartitionRuleManager;
+
+#[cfg(feature = "enterprise")]
+use crate::information_schema::InformationSchemaTableFactoryRef;
+use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
+use crate::kvbackend::manager::{SystemCatalog, CATALOG_CACHE_MAX_CAPACITY};
+use crate::kvbackend::KvBackendCatalogManager;
+use crate::process_manager::ProcessManagerRef;
+use crate::system_schema::pg_catalog::PGCatalogProvider;
+
+pub struct KvBackendCatalogManagerBuilder {
+    information_extension: InformationExtensionRef,
+    backend: KvBackendRef,
+    cache_registry: LayeredCacheRegistryRef,
+    procedure_manager: Option<ProcedureManagerRef>,
+    process_manager: Option<ProcessManagerRef>,
+    #[cfg(feature = "enterprise")]
+    extra_information_table_factories:
+        std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
+}
+
+impl KvBackendCatalogManagerBuilder {
+    pub fn new(
+        information_extension: InformationExtensionRef,
+        backend: KvBackendRef,
+        cache_registry: LayeredCacheRegistryRef,
+    ) -> Self {
+        Self {
+            information_extension,
+            backend,
+            cache_registry,
+            procedure_manager: None,
+            process_manager: None,
+            #[cfg(feature = "enterprise")]
+            extra_information_table_factories: std::collections::HashMap::new(),
+        }
+    }
+
+    pub fn with_procedure_manager(mut self, procedure_manager: ProcedureManagerRef) -> Self {
+        self.procedure_manager = Some(procedure_manager);
+        self
+    }
+
+    pub fn with_process_manager(mut self, process_manager: ProcessManagerRef) -> Self {
+        self.process_manager = Some(process_manager);
+        self
+    }
+
+    /// Sets the extra information tables.
+    #[cfg(feature = "enterprise")]
+    pub fn with_extra_information_table_factories(
+        mut self,
+        factories: std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
+    ) -> Self {
+        self.extra_information_table_factories = factories;
+        self
+    }
+
+    pub fn build(self) -> Arc<KvBackendCatalogManager> {
+        let Self {
+            information_extension,
+            backend,
+            cache_registry,
+            procedure_manager,
+            process_manager,
+            #[cfg(feature = "enterprise")]
+            extra_information_table_factories,
+        } = self;
+        Arc::new_cyclic(|me| KvBackendCatalogManager {
+            information_extension,
+            partition_manager: Arc::new(PartitionRuleManager::new(
+                backend.clone(),
+                cache_registry
+                    .get()
+                    .expect("Failed to get table_route_cache"),
+            )),
+            table_metadata_manager: Arc::new(TableMetadataManager::new(backend.clone())),
+            system_catalog: SystemCatalog {
+                catalog_manager: me.clone(),
+                catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
+                pg_catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
+                information_schema_provider: {
+                    let provider = InformationSchemaProvider::new(
+                        DEFAULT_CATALOG_NAME.to_string(),
+                        me.clone(),
+                        Arc::new(FlowMetadataManager::new(backend.clone())),
+                        process_manager.clone(),
+                        backend.clone(),
+                    );
+                    #[cfg(feature = "enterprise")]
+                    let provider = provider
+                        .with_extra_table_factories(extra_information_table_factories.clone());
+                    Arc::new(provider)
+                },
+                pg_catalog_provider: Arc::new(PGCatalogProvider::new(
+                    DEFAULT_CATALOG_NAME.to_string(),
+                    me.clone(),
+                )),
+                backend,
+                process_manager,
+                #[cfg(feature = "enterprise")]
+                extra_information_table_factories,
+            },
+            cache_registry,
+            procedure_manager,
+        })
+    }
+}
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -22,21 +22,24 @@ use common_catalog::consts::{
    PG_CATALOG_NAME,
 };
 use common_error::ext::BoxedError;
-use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef};
+use common_meta::cache::{
+    LayeredCacheRegistryRef, TableRoute, TableRouteCacheRef, ViewInfoCacheRef,
+};
 use common_meta::key::catalog_name::CatalogNameKey;
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::schema_name::SchemaNameKey;
-use common_meta::key::table_info::TableInfoValue;
+use common_meta::key::table_info::{TableInfoManager, TableInfoValue};
 use common_meta::key::table_name::TableNameKey;
-use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
+use common_meta::key::TableMetadataManagerRef;
 use common_meta::kv_backend::KvBackendRef;
 use common_procedure::ProcedureManagerRef;
 use futures_util::stream::BoxStream;
 use futures_util::{StreamExt, TryStreamExt};
 use moka::sync::Cache;
-use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
+use partition::manager::PartitionRuleManagerRef;
 use session::context::{Channel, QueryContext};
 use snafu::prelude::*;
+use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
 use table::dist_table::DistTable;
 use table::metadata::TableId;
 use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
@@ -49,6 +52,8 @@ use crate::error::{
    CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
    ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
 };
+#[cfg(feature = "enterprise")]
+use crate::information_schema::InformationSchemaTableFactoryRef;
 use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
 use crate::kvbackend::TableCacheRef;
 use crate::process_manager::ProcessManagerRef;
@@ -64,60 +69,22 @@ use crate::CatalogManager;
 #[derive(Clone)]
 pub struct KvBackendCatalogManager {
    /// Provides the extension methods for the `information_schema` tables
-    information_extension: InformationExtensionRef,
+    pub(super) information_extension: InformationExtensionRef,
    /// Manages partition rules.
-    partition_manager: PartitionRuleManagerRef,
+    pub(super) partition_manager: PartitionRuleManagerRef,
    /// Manages table metadata.
-    table_metadata_manager: TableMetadataManagerRef,
+    pub(super) table_metadata_manager: TableMetadataManagerRef,
    /// A sub-CatalogManager that handles system tables
-    system_catalog: SystemCatalog,
+    pub(super) system_catalog: SystemCatalog,
    /// Cache registry for all caches.
-    cache_registry: LayeredCacheRegistryRef,
+    pub(super) cache_registry: LayeredCacheRegistryRef,
    /// Only available in `Standalone` mode.
-    procedure_manager: Option<ProcedureManagerRef>,
+    pub(super) procedure_manager: Option<ProcedureManagerRef>,
 }

-const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;
+pub(super) const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;

 impl KvBackendCatalogManager {
-    pub fn new(
-        information_extension: InformationExtensionRef,
-        backend: KvBackendRef,
-        cache_registry: LayeredCacheRegistryRef,
-        procedure_manager: Option<ProcedureManagerRef>,
-        process_manager: Option<ProcessManagerRef>,
-    ) -> Arc<Self> {
-        Arc::new_cyclic(|me| Self {
-            information_extension,
-            partition_manager: Arc::new(PartitionRuleManager::new(
-                backend.clone(),
-                cache_registry
-                    .get()
-                    .expect("Failed to get table_route_cache"),
-            )),
-            table_metadata_manager: Arc::new(TableMetadataManager::new(backend.clone())),
-            system_catalog: SystemCatalog {
-                catalog_manager: me.clone(),
-                catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
-                pg_catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
-                information_schema_provider: Arc::new(InformationSchemaProvider::new(
-                    DEFAULT_CATALOG_NAME.to_string(),
-                    me.clone(),
-                    Arc::new(FlowMetadataManager::new(backend.clone())),
-                    process_manager.clone(),
-                )),
-                pg_catalog_provider: Arc::new(PGCatalogProvider::new(
-                    DEFAULT_CATALOG_NAME.to_string(),
-                    me.clone(),
-                )),
-                backend,
-                process_manager,
-            },
-            cache_registry,
-            procedure_manager,
-        })
-    }
-
    pub fn view_info_cache(&self) -> Result<ViewInfoCacheRef> {
        self.cache_registry.get().context(CacheNotFoundSnafu {
            name: "view_info_cache",
@@ -140,6 +107,61 @@ impl KvBackendCatalogManager {
    pub fn procedure_manager(&self) -> Option<ProcedureManagerRef> {
        self.procedure_manager.clone()
    }
+
+    // Override logical table's partition key indices with physical table's.
+    async fn override_logical_table_partition_key_indices(
+        table_route_cache: &TableRouteCacheRef,
+        table_info_manager: &TableInfoManager,
+        table: TableRef,
+    ) -> Result<TableRef> {
+        // If the table is not a metric table, return the table directly.
+        if table.table_info().meta.engine != METRIC_ENGINE_NAME {
+            return Ok(table);
+        }
+
+        if let Some(table_route_value) = table_route_cache
+            .get(table.table_info().table_id())
+            .await
+            .context(TableMetadataManagerSnafu)?
+            && let TableRoute::Logical(logical_route) = &*table_route_value
+            && let Some(physical_table_info_value) = table_info_manager
+                .get(logical_route.physical_table_id())
+                .await
+                .context(TableMetadataManagerSnafu)?
+        {
+            let mut new_table_info = (*table.table_info()).clone();
+
+            // Remap partition key indices from physical table to logical table
+            new_table_info.meta.partition_key_indices = physical_table_info_value
+                .table_info
+                .meta
+                .partition_key_indices
+                .iter()
+                .filter_map(|&physical_index| {
+                    // Get the column name from the physical table using the physical index
+                    physical_table_info_value
+                        .table_info
+                        .meta
+                        .schema
+                        .column_schemas
+                        .get(physical_index)
+                        .and_then(|physical_column| {
+                            // Find the corresponding index in the logical table schema
+                            new_table_info
+                                .meta
+                                .schema
+                                .column_index_by_name(physical_column.name.as_str())
+                        })
+                })
+                .collect();
+
+            let new_table = DistTable::table(Arc::new(new_table_info));
+
+            return Ok(new_table);
+        }
+
+        Ok(table)
+    }
 }

 #[async_trait::async_trait]
@@ -266,16 +288,28 @@ impl CatalogManager for KvBackendCatalogManager {
        let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu {
            name: "table_cache",
        })?;
-        if let Some(table) = table_cache
+
+        let table = table_cache
            .get_by_ref(&TableName {
                catalog_name: catalog_name.to_string(),
                schema_name: schema_name.to_string(),
                table_name: table_name.to_string(),
            })
            .await
-            .context(GetTableCacheSnafu)?
-        {
-            return Ok(Some(table));
+            .context(GetTableCacheSnafu)?;
+
+        if let Some(table) = table {
+            let table_route_cache: TableRouteCacheRef =
+                self.cache_registry.get().context(CacheNotFoundSnafu {
+                    name: "table_route_cache",
+                })?;
+            return Self::override_logical_table_partition_key_indices(
+                &table_route_cache,
+                self.table_metadata_manager.table_info_manager(),
+                table,
+            )
+            .await
+            .map(Some);
        }

        if channel == Channel::Postgres {
@@ -288,7 +322,7 @@ impl CatalogManager for KvBackendCatalogManager {
            }
        }

-        return Ok(None);
+        Ok(None)
    }

    async fn tables_by_ids(
@@ -340,8 +374,20 @@ impl CatalogManager for KvBackendCatalogManager {
        let catalog = catalog.to_string();
        let schema = schema.to_string();
        let semaphore = Arc::new(Semaphore::new(CONCURRENCY));
+        let table_route_cache: Result<TableRouteCacheRef> =
+            self.cache_registry.get().context(CacheNotFoundSnafu {
+                name: "table_route_cache",
+            });

        common_runtime::spawn_global(async move {
+            let table_route_cache = match table_route_cache {
+                Ok(table_route_cache) => table_route_cache,
+                Err(e) => {
+                    let _ = tx.send(Err(e)).await;
+                    return;
+                }
+            };
+
            let table_id_stream = metadata_manager
                .table_name_manager()
                .tables(&catalog, &schema)
@@ -368,6 +414,7 @@ impl CatalogManager for KvBackendCatalogManager {
                let metadata_manager = metadata_manager.clone();
                let tx = tx.clone();
                let semaphore = semaphore.clone();
+                let table_route_cache = table_route_cache.clone();
                common_runtime::spawn_global(async move {
                    // we don't explicitly close the semaphore so just ignore the potential error.
                    let _ = semaphore.acquire().await;
@@ -385,6 +432,16 @@ impl CatalogManager for KvBackendCatalogManager {
                    };

                    for table in table_info_values.into_values().map(build_table) {
+                        let table = if let Ok(table) = table {
+                            Self::override_logical_table_partition_key_indices(
+                                &table_route_cache,
+                                metadata_manager.table_info_manager(),
+                                table,
+                            )
+                            .await
+                        } else {
+                            table
+                        };
                        if tx.send(table).await.is_err() {
                            return;
                        }
@@ -414,16 +471,19 @@ fn build_table(table_info_value: TableInfoValue) -> Result<TableRef> {
 /// - information_schema.{tables}
 /// - pg_catalog.{tables}
 #[derive(Clone)]
-struct SystemCatalog {
-    catalog_manager: Weak<KvBackendCatalogManager>,
-    catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
-    pg_catalog_cache: Cache<String, Arc<PGCatalogProvider>>,
+pub(super) struct SystemCatalog {
+    pub(super) catalog_manager: Weak<KvBackendCatalogManager>,
+    pub(super) catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
+    pub(super) pg_catalog_cache: Cache<String, Arc<PGCatalogProvider>>,

    // system_schema_provider for default catalog
-    information_schema_provider: Arc<InformationSchemaProvider>,
-    pg_catalog_provider: Arc<PGCatalogProvider>,
-    backend: KvBackendRef,
-    process_manager: Option<ProcessManagerRef>,
+    pub(super) information_schema_provider: Arc<InformationSchemaProvider>,
+    pub(super) pg_catalog_provider: Arc<PGCatalogProvider>,
+    pub(super) backend: KvBackendRef,
+    pub(super) process_manager: Option<ProcessManagerRef>,
+    #[cfg(feature = "enterprise")]
+    pub(super) extra_information_table_factories:
+        std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
 }

 impl SystemCatalog {
@@ -487,12 +547,17 @@ impl SystemCatalog {
        if schema == INFORMATION_SCHEMA_NAME {
            let information_schema_provider =
                self.catalog_cache.get_with_by_ref(catalog, move || {
-                    Arc::new(InformationSchemaProvider::new(
+                    let provider = InformationSchemaProvider::new(
                        catalog.to_string(),
                        self.catalog_manager.clone(),
                        Arc::new(FlowMetadataManager::new(self.backend.clone())),
                        self.process_manager.clone(),
-                    ))
+                        self.backend.clone(),
+                    );
+                    #[cfg(feature = "enterprise")]
+                    let provider = provider
+                        .with_extra_table_factories(self.extra_information_table_factories.clone());
+                    Arc::new(provider)
                });
            information_schema_provider.table(table_name)
        } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -14,6 +14,7 @@

 #![feature(assert_matches)]
 #![feature(try_blocks)]
+#![feature(let_chains)]

 use std::any::Any;
 use std::fmt::{Debug, Formatter};
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -352,11 +352,13 @@ impl MemoryCatalogManager {
    }

    fn create_catalog_entry(self: &Arc<Self>, catalog: String) -> SchemaEntries {
+        let backend = Arc::new(MemoryKvBackend::new());
        let information_schema_provider = InformationSchemaProvider::new(
            catalog,
            Arc::downgrade(self) as Weak<dyn CatalogManager>,
-            Arc::new(FlowMetadataManager::new(Arc::new(MemoryKvBackend::new()))),
+            Arc::new(FlowMetadataManager::new(backend.clone())),
            None, // we don't need ProcessManager on regions server.
+            backend,
        );
        let information_schema = information_schema_provider.tables().clone();

--- a/src/catalog/src/process_manager.rs
+++ b/src/catalog/src/process_manager.rs
@@ -15,13 +15,13 @@
 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, RwLock};

 use api::v1::frontend::{KillProcessRequest, ListProcessRequest, ProcessInfo};
 use common_base::cancellation::CancellationHandle;
 use common_frontend::selector::{FrontendSelector, MetaClientSelector};
-use common_telemetry::{debug, info};
+use common_telemetry::{debug, info, warn};
 use common_time::util::current_time_millis;
 use meta_client::MetaClientRef;
 use snafu::{ensure, OptionExt, ResultExt};
@@ -29,6 +29,7 @@ use snafu::{ensure, OptionExt, ResultExt};
 use crate::error;
 use crate::metrics::{PROCESS_KILL_COUNT, PROCESS_LIST_COUNT};

+pub type ProcessId = u32;
 pub type ProcessManagerRef = Arc<ProcessManager>;

 /// Query process manager.
@@ -36,9 +37,9 @@ pub struct ProcessManager {
    /// Local frontend server address,
    server_addr: String,
    /// Next process id for local queries.
-    next_id: AtomicU64,
+    next_id: AtomicU32,
    /// Running process per catalog.
-    catalogs: RwLock<HashMap<String, HashMap<u64, CancellableProcess>>>,
+    catalogs: RwLock<HashMap<String, HashMap<ProcessId, CancellableProcess>>>,
    /// Frontend selector to locate frontend nodes.
    frontend_selector: Option<MetaClientSelector>,
 }
@@ -65,9 +66,9 @@ impl ProcessManager {
        schemas: Vec<String>,
        query: String,
        client: String,
-        id: Option<u64>,
+        query_id: Option<ProcessId>,
    ) -> Ticket {
-        let id = id.unwrap_or_else(|| self.next_id.fetch_add(1, Ordering::Relaxed));
+        let id = query_id.unwrap_or_else(|| self.next_id.fetch_add(1, Ordering::Relaxed));
        let process = ProcessInfo {
            id,
            catalog: catalog.clone(),
@@ -96,12 +97,12 @@ impl ProcessManager {
    }

    /// Generates the next process id.
-    pub fn next_id(&self) -> u64 {
+    pub fn next_id(&self) -> u32 {
        self.next_id.fetch_add(1, Ordering::Relaxed)
    }

    /// De-register a query from process list.
-    pub fn deregister_query(&self, catalog: String, id: u64) {
+    pub fn deregister_query(&self, catalog: String, id: ProcessId) {
        if let Entry::Occupied(mut o) = self.catalogs.write().unwrap().entry(catalog) {
            let process = o.get_mut().remove(&id);
            debug!("Deregister process: {:?}", process);
@@ -140,14 +141,20 @@ impl ProcessManager {
                .await
                .context(error::InvokeFrontendSnafu)?;
            for mut f in frontends {
-                processes.extend(
-                    f.list_process(ListProcessRequest {
+                let result = f
+                    .list_process(ListProcessRequest {
                        catalog: catalog.unwrap_or_default().to_string(),
                    })
                    .await
-                    .context(error::InvokeFrontendSnafu)?
-                    .processes,
-                );
+                    .context(error::InvokeFrontendSnafu);
+                match result {
+                    Ok(resp) => {
+                        processes.extend(resp.processes);
+                    }
+                    Err(e) => {
+                        warn!(e; "Skipping failing node: {:?}", f)
+                    }
+                }
            }
        }
        processes.extend(self.local_processes(catalog)?);
@@ -159,26 +166,10 @@ impl ProcessManager {
        &self,
        server_addr: String,
        catalog: String,
-        id: u64,
+        id: ProcessId,
    ) -> error::Result<bool> {
        if server_addr == self.server_addr {
-            if let Some(catalogs) = self.catalogs.write().unwrap().get_mut(&catalog) {
-                if let Some(process) = catalogs.remove(&id) {
-                    process.handle.cancel();
-                    info!(
-                        "Killed process, catalog: {}, id: {:?}",
-                        process.process.catalog, process.process.id
-                    );
-                    PROCESS_KILL_COUNT.with_label_values(&[&catalog]).inc();
-                    Ok(true)
-                } else {
-                    debug!("Failed to kill process, id not found: {}", id);
-                    Ok(false)
-                }
-            } else {
-                debug!("Failed to kill process, catalog not found: {}", catalog);
-                Ok(false)
-            }
+            self.kill_local_process(catalog, id).await
        } else {
            let mut nodes = self
                .frontend_selector
@@ -204,12 +195,33 @@ impl ProcessManager {
            Ok(true)
        }
    }
+
+    /// Kills local query with provided catalog and id.
+    pub async fn kill_local_process(&self, catalog: String, id: ProcessId) -> error::Result<bool> {
+        if let Some(catalogs) = self.catalogs.write().unwrap().get_mut(&catalog) {
+            if let Some(process) = catalogs.remove(&id) {
+                process.handle.cancel();
+                info!(
+                    "Killed process, catalog: {}, id: {:?}",
+                    process.process.catalog, process.process.id
+                );
+                PROCESS_KILL_COUNT.with_label_values(&[&catalog]).inc();
+                Ok(true)
+            } else {
+                debug!("Failed to kill process, id not found: {}", id);
+                Ok(false)
+            }
+        } else {
+            debug!("Failed to kill process, catalog not found: {}", catalog);
+            Ok(false)
+        }
+    }
 }

 pub struct Ticket {
    pub(crate) catalog: String,
    pub(crate) manager: ProcessManagerRef,
-    pub(crate) id: u64,
+    pub(crate) id: ProcessId,
    pub cancellation_handle: Arc<CancellationHandle>,
 }

@@ -323,7 +335,7 @@ mod tests {
        assert_eq!(running_processes.len(), 2);

        // Verify both processes are present
-        let ids: Vec<u64> = running_processes.iter().map(|p| p.id).collect();
+        let ids: Vec<u32> = running_processes.iter().map(|p| p.id).collect();
        assert!(ids.contains(&ticket1.id));
        assert!(ids.contains(&ticket2.id));
    }
--- a/src/catalog/src/system_schema.rs
+++ b/src/catalog/src/system_schema.rs
@@ -15,7 +15,7 @@
 pub mod information_schema;
 mod memory_table;
 pub mod pg_catalog;
-mod predicate;
+pub mod predicate;
 mod utils;

 use std::collections::HashMap;
@@ -96,7 +96,7 @@ trait SystemSchemaProviderInner {
    }
 }

-pub(crate) trait SystemTable {
+pub trait SystemTable {
    fn table_id(&self) -> TableId;

    fn table_name(&self) -> &'static str;
@@ -110,7 +110,7 @@ pub(crate) trait SystemTable {
    }
 }

-pub(crate) type SystemTableRef = Arc<dyn SystemTable + Send + Sync>;
+pub type SystemTableRef = Arc<dyn SystemTable + Send + Sync>;

 struct SystemTableDataSource {
    table: SystemTableRef,
--- a/src/catalog/src/system_schema/information_schema.rs
+++ b/src/catalog/src/system_schema/information_schema.rs
@@ -19,7 +19,7 @@ mod information_memory_table;
 pub mod key_column_usage;
 mod partitions;
 mod procedure_info;
-mod process_list;
+pub mod process_list;
 pub mod region_peers;
 mod region_statistics;
 mod runtime_metrics;
@@ -38,6 +38,7 @@ use common_meta::cluster::NodeInfo;
 use common_meta::datanode::RegionStat;
 use common_meta::key::flow::flow_state::FlowStat;
 use common_meta::key::flow::FlowMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
 use common_procedure::ProcedureInfo;
 use common_recordbatch::SendableRecordBatchStream;
 use datatypes::schema::SchemaRef;
@@ -112,6 +113,25 @@ macro_rules! setup_memory_table {
    };
 }

+#[cfg(feature = "enterprise")]
+pub struct MakeInformationTableRequest {
+    pub catalog_name: String,
+    pub catalog_manager: Weak<dyn CatalogManager>,
+    pub kv_backend: KvBackendRef,
+}
+
+/// A factory trait for making information schema tables.
+///
+/// This trait allows for extensibility of the information schema by providing
+/// a way to dynamically create custom information schema tables.
+#[cfg(feature = "enterprise")]
+pub trait InformationSchemaTableFactory {
+    fn make_information_table(&self, req: MakeInformationTableRequest) -> SystemTableRef;
+}
+
+#[cfg(feature = "enterprise")]
+pub type InformationSchemaTableFactoryRef = Arc<dyn InformationSchemaTableFactory + Send + Sync>;
+
 /// The `information_schema` tables info provider.
 pub struct InformationSchemaProvider {
    catalog_name: String,
@@ -119,6 +139,10 @@ pub struct InformationSchemaProvider {
    process_manager: Option<ProcessManagerRef>,
    flow_metadata_manager: Arc<FlowMetadataManager>,
    tables: HashMap<String, TableRef>,
+    #[allow(dead_code)]
+    kv_backend: KvBackendRef,
+    #[cfg(feature = "enterprise")]
+    extra_table_factories: HashMap<String, InformationSchemaTableFactoryRef>,
 }

 impl SystemSchemaProvider for InformationSchemaProvider {
@@ -128,6 +152,7 @@ impl SystemSchemaProvider for InformationSchemaProvider {
        &self.tables
    }
 }
+
 impl SystemSchemaProviderInner for InformationSchemaProvider {
    fn catalog_name(&self) -> &str {
        &self.catalog_name
@@ -137,6 +162,16 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
    }

    fn system_table(&self, name: &str) -> Option<SystemTableRef> {
+        #[cfg(feature = "enterprise")]
+        if let Some(factory) = self.extra_table_factories.get(name) {
+            let req = MakeInformationTableRequest {
+                catalog_name: self.catalog_name.clone(),
+                catalog_manager: self.catalog_manager.clone(),
+                kv_backend: self.kv_backend.clone(),
+            };
+            return Some(factory.make_information_table(req));
+        }
+
        match name.to_ascii_lowercase().as_str() {
            TABLES => Some(Arc::new(InformationSchemaTables::new(
                self.catalog_name.clone(),
@@ -226,6 +261,7 @@ impl InformationSchemaProvider {
        catalog_manager: Weak<dyn CatalogManager>,
        flow_metadata_manager: Arc<FlowMetadataManager>,
        process_manager: Option<ProcessManagerRef>,
+        kv_backend: KvBackendRef,
    ) -> Self {
        let mut provider = Self {
            catalog_name,
@@ -233,6 +269,9 @@ impl InformationSchemaProvider {
            flow_metadata_manager,
            process_manager,
            tables: HashMap::new(),
+            kv_backend,
+            #[cfg(feature = "enterprise")]
+            extra_table_factories: HashMap::new(),
        };

        provider.build_tables();
@@ -240,6 +279,16 @@ impl InformationSchemaProvider {
        provider
    }

+    #[cfg(feature = "enterprise")]
+    pub(crate) fn with_extra_table_factories(
+        mut self,
+        factories: HashMap<String, InformationSchemaTableFactoryRef>,
+    ) -> Self {
+        self.extra_table_factories = factories;
+        self.build_tables();
+        self
+    }
+
    fn build_tables(&mut self) {
        let mut tables = HashMap::new();

@@ -290,16 +339,19 @@ impl InformationSchemaProvider {
        if let Some(process_list) = self.build_table(PROCESS_LIST) {
            tables.insert(PROCESS_LIST.to_string(), process_list);
        }
+        #[cfg(feature = "enterprise")]
+        for name in self.extra_table_factories.keys() {
+            tables.insert(name.to_string(), self.build_table(name).expect(name));
+        }
        // Add memory tables
        for name in MEMORY_TABLES.iter() {
            tables.insert((*name).to_string(), self.build_table(name).expect(name));
        }
-
        self.tables = tables;
    }
 }

-trait InformationTable {
+pub trait InformationTable {
    fn table_id(&self) -> TableId;

    fn table_name(&self) -> &'static str;
--- a/src/catalog/src/system_schema/information_schema/information_memory_table.rs
+++ b/src/catalog/src/system_schema/information_schema/information_memory_table.rs
@@ -15,7 +15,8 @@
 use std::sync::Arc;

 use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
-use datatypes::schema::{Schema, SchemaRef};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::vectors::{Int64Vector, StringVector, VectorRef};

 use crate::system_schema::information_schema::table_names::*;
@@ -367,28 +368,18 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>

        TRIGGERS => (
            vec![
-                string_column("TRIGGER_CATALOG"),
-                string_column("TRIGGER_SCHEMA"),
                string_column("TRIGGER_NAME"),
-                string_column("EVENT_MANIPULATION"),
-                string_column("EVENT_OBJECT_CATALOG"),
-                string_column("EVENT_OBJECT_SCHEMA"),
-                string_column("EVENT_OBJECT_TABLE"),
-                bigint_column("ACTION_ORDER"),
-                string_column("ACTION_CONDITION"),
-                string_column("ACTION_STATEMENT"),
-                string_column("ACTION_ORIENTATION"),
-                string_column("ACTION_TIMING"),
-                string_column("ACTION_REFERENCE_OLD_TABLE"),
-                string_column("ACTION_REFERENCE_NEW_TABLE"),
-                string_column("ACTION_REFERENCE_OLD_ROW"),
-                string_column("ACTION_REFERENCE_NEW_ROW"),
-                timestamp_micro_column("CREATED"),
-                string_column("SQL_MODE"),
-                string_column("DEFINER"),
-                string_column("CHARACTER_SET_CLIENT"),
-                string_column("COLLATION_CONNECTION"),
-                string_column("DATABASE_COLLATION"),
+                ColumnSchema::new(
+                    "trigger_id",
+                    ConcreteDataType::uint64_datatype(),
+                    false,
+                ),
+                string_column("TRIGGER_DEFINITION"),
+                ColumnSchema::new(
+                    "flownode_id",
+                    ConcreteDataType::uint64_datatype(),
+                    true,
+                ),
            ],
            vec![],
        ),
--- a/src/catalog/src/system_schema/information_schema/partitions.rs
+++ b/src/catalog/src/system_schema/information_schema/partitions.rs
@@ -329,13 +329,8 @@ impl InformationSchemaPartitionsBuilder {
            self.partition_names.push(Some(&partition_name));
            self.partition_ordinal_positions
                .push(Some((index + 1) as i64));
-            let expressions = if partition.partition.partition_columns().is_empty() {
-                None
-            } else {
-                Some(partition.partition.to_string())
-            };
-
-            self.partition_expressions.push(expressions.as_deref());
+            let expression = partition.partition_expr.as_ref().map(|e| e.to_string());
+            self.partition_expressions.push(expression.as_deref());
            self.create_times.push(Some(TimestampMicrosecond::from(
                table_info.meta.created_on.timestamp_millis(),
            )));
--- a/src/catalog/src/system_schema/information_schema/process_list.rs
+++ b/src/catalog/src/system_schema/information_schema/process_list.rs
@@ -39,14 +39,14 @@ use crate::process_manager::ProcessManagerRef;
 use crate::system_schema::information_schema::InformationTable;

 /// Column names of `information_schema.process_list`
-const ID: &str = "id";
-const CATALOG: &str = "catalog";
-const SCHEMAS: &str = "schemas";
-const QUERY: &str = "query";
-const CLIENT: &str = "client";
-const FRONTEND: &str = "frontend";
-const START_TIMESTAMP: &str = "start_timestamp";
-const ELAPSED_TIME: &str = "elapsed_time";
+pub const ID: &str = "id";
+pub const CATALOG: &str = "catalog";
+pub const SCHEMAS: &str = "schemas";
+pub const QUERY: &str = "query";
+pub const CLIENT: &str = "client";
+pub const FRONTEND: &str = "frontend";
+pub const START_TIMESTAMP: &str = "start_timestamp";
+pub const ELAPSED_TIME: &str = "elapsed_time";

 /// `information_schema.process_list` table implementation that tracks running
 /// queries in current cluster.
--- a/src/catalog/src/system_schema/information_schema/region_statistics.rs
+++ b/src/catalog/src/system_schema/information_schema/region_statistics.rs
@@ -44,6 +44,7 @@ const DISK_SIZE: &str = "disk_size";
 const MEMTABLE_SIZE: &str = "memtable_size";
 const MANIFEST_SIZE: &str = "manifest_size";
 const SST_SIZE: &str = "sst_size";
+const SST_NUM: &str = "sst_num";
 const INDEX_SIZE: &str = "index_size";
 const ENGINE: &str = "engine";
 const REGION_ROLE: &str = "region_role";
@@ -87,6 +88,7 @@ impl InformationSchemaRegionStatistics {
            ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
            ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
            ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
+            ColumnSchema::new(SST_NUM, ConcreteDataType::uint64_datatype(), true),
            ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
            ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
            ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
@@ -149,6 +151,7 @@ struct InformationSchemaRegionStatisticsBuilder {
    memtable_sizes: UInt64VectorBuilder,
    manifest_sizes: UInt64VectorBuilder,
    sst_sizes: UInt64VectorBuilder,
+    sst_nums: UInt64VectorBuilder,
    index_sizes: UInt64VectorBuilder,
    engines: StringVectorBuilder,
    region_roles: StringVectorBuilder,
@@ -167,6 +170,7 @@ impl InformationSchemaRegionStatisticsBuilder {
            memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            sst_nums: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -197,6 +201,7 @@ impl InformationSchemaRegionStatisticsBuilder {
            (MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
            (MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
            (SST_SIZE, &Value::from(region_stat.sst_size)),
+            (SST_NUM, &Value::from(region_stat.sst_num)),
            (INDEX_SIZE, &Value::from(region_stat.index_size)),
            (ENGINE, &Value::from(region_stat.engine.as_str())),
            (REGION_ROLE, &Value::from(region_stat.role.to_string())),
@@ -215,6 +220,7 @@ impl InformationSchemaRegionStatisticsBuilder {
        self.memtable_sizes.push(Some(region_stat.memtable_size));
        self.manifest_sizes.push(Some(region_stat.manifest_size));
        self.sst_sizes.push(Some(region_stat.sst_size));
+        self.sst_nums.push(Some(region_stat.sst_num));
        self.index_sizes.push(Some(region_stat.index_size));
        self.engines.push(Some(&region_stat.engine));
        self.region_roles.push(Some(&region_stat.role.to_string()));
@@ -230,6 +236,7 @@ impl InformationSchemaRegionStatisticsBuilder {
            Arc::new(self.memtable_sizes.finish()),
            Arc::new(self.manifest_sizes.finish()),
            Arc::new(self.sst_sizes.finish()),
+            Arc::new(self.sst_nums.finish()),
            Arc::new(self.index_sizes.finish()),
            Arc::new(self.engines.finish()),
            Arc::new(self.region_roles.finish()),
--- a/src/catalog/src/system_schema/pg_catalog/pg_class.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_class.rs
@@ -169,7 +169,7 @@ impl DfPartitionStream for PGClass {
 }

 /// Builds the `pg_catalog.pg_class` table row by row
-/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] cuz we don't have user.
+/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] because we don't have users.
 /// Once we have user system, make it the actual owner of the table.
 struct PGClassBuilder {
    schema: SchemaRef,
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -207,6 +207,7 @@ mod tests {
    use session::context::QueryContext;

    use super::*;
+    use crate::kvbackend::KvBackendCatalogManagerBuilder;
    use crate::memory::MemoryCatalogManager;

    #[test]
@@ -323,13 +324,13 @@ mod tests {
            .build(),
        );

-        let catalog_manager = KvBackendCatalogManager::new(
+        let catalog_manager = KvBackendCatalogManagerBuilder::new(
            Arc::new(NoopInformationExtension),
            backend.clone(),
            layered_cache_registry,
-            None,
-            None,
-        );
+        )
+        .build();
+
        let table_metadata_manager = TableMetadataManager::new(backend);
        let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);
        view_info.table_type = TableType::View;
--- a/src/cli/Cargo.toml
+++ b/src/cli/Cargo.toml
@@ -16,6 +16,7 @@ mysql_kvbackend = ["common-meta/mysql_kvbackend", "meta-srv/mysql_kvbackend"]
 workspace = true

 [dependencies]
+async-stream.workspace = true
 async-trait.workspace = true
 auth.workspace = true
 base64.workspace = true
@@ -42,7 +43,6 @@ common-time.workspace = true
 common-version.workspace = true
 common-wal.workspace = true
 datatypes.workspace = true
-either = "1.8"
 etcd-client.workspace = true
 futures.workspace = true
 humantime.workspace = true
@@ -50,6 +50,7 @@ meta-client.workspace = true
 meta-srv.workspace = true
 nu-ansi-term = "0.46"
 object-store.workspace = true
+operator.workspace = true
 query.workspace = true
 rand.workspace = true
 reqwest.workspace = true
@@ -65,6 +66,7 @@ tokio.workspace = true
 tracing-appender.workspace = true

 [dev-dependencies]
+common-meta = { workspace = true, features = ["testing"] }
 common-version.workspace = true
 serde.workspace = true
 tempfile.workspace = true
--- a/src/cli/src/bench.rs
+++ b/src/cli/src/bench.rs
@@ -160,6 +160,7 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
        options: Default::default(),
        region_numbers: (1..=100).collect(),
        partition_key_indices: vec![],
+        column_ids: vec![],
    };

    RawTableInfo {
@@ -187,6 +188,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
                name: String::new(),
                partition: None,
                attrs: BTreeMap::new(),
+                partition_expr: Default::default(),
            },
            leader_peer: Some(Peer {
                id: rng.random_range(0..10),
--- a/src/cli/src/error.rs
+++ b/src/cli/src/error.rs
@@ -17,8 +17,10 @@ use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
+use common_meta::peer::Peer;
 use object_store::Error as ObjectStoreError;
 use snafu::{Location, Snafu};
+use store_api::storage::TableId;

 #[derive(Snafu)]
 #[snafu(visibility(pub))]
@@ -73,6 +75,20 @@ pub enum Error {
        source: common_meta::error::Error,
    },

+    #[snafu(display("Failed to get table metadata"))]
+    TableMetadata {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_meta::error::Error,
+    },
+
+    #[snafu(display("Unexpected error: {}", msg))]
+    Unexpected {
+        msg: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Missing config, msg: {}", msg))]
    MissingConfig {
        msg: String,
@@ -222,6 +238,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Table not found: {table_id}"))]
+    TableNotFound {
+        table_id: TableId,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("OpenDAL operator failed"))]
    OpenDal {
        #[snafu(implicit)]
@@ -267,6 +290,29 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Failed to init backend"))]
+    InitBackend {
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        error: ObjectStoreError,
+    },
+
+    #[snafu(display("Covert column schemas to defs failed"))]
+    CovertColumnSchemasToDefs {
+        #[snafu(implicit)]
+        location: Location,
+        source: operator::error::Error,
+    },
+
+    #[snafu(display("Failed to send request to datanode: {}", peer))]
+    SendRequestToDatanode {
+        peer: Peer,
+        #[snafu(implicit)]
+        location: Location,
+        source: common_meta::error::Error,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -274,9 +320,9 @@ pub type Result<T> = std::result::Result<T, Error>;
 impl ErrorExt for Error {
    fn status_code(&self) -> StatusCode {
        match self {
-            Error::InitMetadata { source, .. } | Error::InitDdlManager { source, .. } => {
-                source.status_code()
-            }
+            Error::InitMetadata { source, .. }
+            | Error::InitDdlManager { source, .. }
+            | Error::TableMetadata { source, .. } => source.status_code(),

            Error::MissingConfig { .. }
            | Error::LoadLayeredConfig { .. }
@@ -290,6 +336,9 @@ impl ErrorExt for Error {
            | Error::InvalidArguments { .. }
            | Error::ParseProxyOpts { .. } => StatusCode::InvalidArguments,

+            Error::CovertColumnSchemasToDefs { source, .. } => source.status_code(),
+            Error::SendRequestToDatanode { source, .. } => source.status_code(),
+
            Error::StartProcedureManager { source, .. }
            | Error::StopProcedureManager { source, .. } => source.status_code(),
            Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
@@ -297,6 +346,7 @@ impl ErrorExt for Error {
            Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
                source.status_code()
            }
+            Error::Unexpected { .. } => StatusCode::Unexpected,

            Error::SerdeJson { .. }
            | Error::FileIo { .. }
@@ -305,7 +355,7 @@ impl ErrorExt for Error {
            | Error::BuildClient { .. } => StatusCode::Unexpected,

            Error::Other { source, .. } => source.status_code(),
-            Error::OpenDal { .. } => StatusCode::Internal,
+            Error::OpenDal { .. } | Error::InitBackend { .. } => StatusCode::Internal,
            Error::S3ConfigNotSet { .. }
            | Error::OutputDirNotSet { .. }
            | Error::EmptyStoreAddrs { .. } => StatusCode::InvalidArguments,
@@ -314,6 +364,7 @@ impl ErrorExt for Error {

            Error::CacheRequired { .. } | Error::BuildCacheRegistry { .. } => StatusCode::Internal,
            Error::MetaClientInit { source, .. } => source.status_code(),
+            Error::TableNotFound { .. } => StatusCode::TableNotFound,
            Error::SchemaNotFound { .. } => StatusCode::DatabaseNotFound,
        }
    }
--- a/src/cli/src/lib.rs
+++ b/src/cli/src/lib.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#![allow(clippy::print_stdout)]
 mod bench;
 mod data;
 mod database;
--- a/src/cli/src/metadata.rs
+++ b/src/cli/src/metadata.rs
@@ -14,29 +14,39 @@

 mod common;
 mod control;
+mod repair;
 mod snapshot;
+mod utils;

 use clap::Subcommand;
 use common_error::ext::BoxedError;

-use crate::metadata::control::ControlCommand;
+use crate::metadata::control::{DelCommand, GetCommand};
+use crate::metadata::repair::RepairLogicalTablesCommand;
 use crate::metadata::snapshot::SnapshotCommand;
 use crate::Tool;

-/// Command for managing metadata operations, including saving metadata snapshots and restoring metadata from snapshots.
+/// Command for managing metadata operations,
+/// including saving and restoring metadata snapshots,
+/// controlling metadata operations, and diagnosing and repairing metadata.
 #[derive(Subcommand)]
 pub enum MetadataCommand {
    #[clap(subcommand)]
    Snapshot(SnapshotCommand),
    #[clap(subcommand)]
-    Control(ControlCommand),
+    Get(GetCommand),
+    #[clap(subcommand)]
+    Del(DelCommand),
+    RepairLogicalTables(RepairLogicalTablesCommand),
 }

 impl MetadataCommand {
    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
        match self {
            MetadataCommand::Snapshot(cmd) => cmd.build().await,
-            MetadataCommand::Control(cmd) => cmd.build().await,
+            MetadataCommand::RepairLogicalTables(cmd) => cmd.build().await,
+            MetadataCommand::Get(cmd) => cmd.build().await,
+            MetadataCommand::Del(cmd) => cmd.build().await,
        }
    }
 }
--- a/src/cli/src/metadata/control.rs
+++ b/src/cli/src/metadata/control.rs
@@ -12,27 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+mod del;
 mod get;
+#[cfg(test)]
+mod test_utils;
 mod utils;

-use clap::Subcommand;
-use common_error::ext::BoxedError;
-use get::GetCommand;
-
-use crate::Tool;
-
-/// Subcommand for metadata control.
-#[derive(Subcommand)]
-pub enum ControlCommand {
-    /// Get the metadata from the metasrv.
-    #[clap(subcommand)]
-    Get(GetCommand),
-}
-
-impl ControlCommand {
-    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
-        match self {
-            ControlCommand::Get(cmd) => cmd.build().await,
-        }
-    }
-}
+pub(crate) use del::DelCommand;
+pub(crate) use get::GetCommand;
--- a/src/cli/src/metadata/control/del.rs
+++ b/src/cli/src/metadata/control/del.rs
@@ -0,0 +1,42 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod key;
+mod table;
+
+use clap::Subcommand;
+use common_error::ext::BoxedError;
+
+use crate::metadata::control::del::key::DelKeyCommand;
+use crate::metadata::control::del::table::DelTableCommand;
+use crate::Tool;
+
+/// The prefix of the tombstone keys.
+pub(crate) const CLI_TOMBSTONE_PREFIX: &str = "__cli_tombstone/";
+
+/// Subcommand for deleting metadata from the metadata store.
+#[derive(Subcommand)]
+pub enum DelCommand {
+    Key(DelKeyCommand),
+    Table(DelTableCommand),
+}
+
+impl DelCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        match self {
+            DelCommand::Key(cmd) => cmd.build().await,
+            DelCommand::Table(cmd) => cmd.build().await,
+        }
+    }
+}
--- a/src/cli/src/metadata/control/del/key.rs
+++ b/src/cli/src/metadata/control/del/key.rs
@@ -0,0 +1,132 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use async_trait::async_trait;
+use clap::Parser;
+use common_error::ext::BoxedError;
+use common_meta::key::tombstone::TombstoneManager;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::rpc::store::RangeRequest;
+
+use crate::metadata::common::StoreConfig;
+use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+use crate::Tool;
+
+/// Delete key-value pairs logically from the metadata store.
+#[derive(Debug, Default, Parser)]
+pub struct DelKeyCommand {
+    /// The key to delete from the metadata store.
+    key: String,
+
+    /// Delete key-value pairs with the given prefix.
+    #[clap(long)]
+    prefix: bool,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+}
+
+impl DelKeyCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        let kv_backend = self.store.build().await?;
+        Ok(Box::new(DelKeyTool {
+            key: self.key.to_string(),
+            prefix: self.prefix,
+            key_deleter: KeyDeleter::new(kv_backend),
+        }))
+    }
+}
+
+struct KeyDeleter {
+    kv_backend: KvBackendRef,
+    tombstone_manager: TombstoneManager,
+}
+
+impl KeyDeleter {
+    fn new(kv_backend: KvBackendRef) -> Self {
+        Self {
+            kv_backend: kv_backend.clone(),
+            tombstone_manager: TombstoneManager::new_with_prefix(kv_backend, CLI_TOMBSTONE_PREFIX),
+        }
+    }
+
+    async fn delete(&self, key: &str, prefix: bool) -> Result<usize, BoxedError> {
+        let mut req = RangeRequest::default().with_keys_only();
+        if prefix {
+            req = req.with_prefix(key.as_bytes());
+        } else {
+            req = req.with_key(key.as_bytes());
+        }
+        let resp = self.kv_backend.range(req).await.map_err(BoxedError::new)?;
+        let keys = resp.kvs.iter().map(|kv| kv.key.clone()).collect::<Vec<_>>();
+        self.tombstone_manager
+            .create(keys)
+            .await
+            .map_err(BoxedError::new)
+    }
+}
+
+struct DelKeyTool {
+    key: String,
+    prefix: bool,
+    key_deleter: KeyDeleter,
+}
+
+#[async_trait]
+impl Tool for DelKeyTool {
+    async fn do_work(&self) -> Result<(), BoxedError> {
+        let deleted = self.key_deleter.delete(&self.key, self.prefix).await?;
+        // Print the number of deleted keys.
+        println!("{}", deleted);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use common_meta::kv_backend::chroot::ChrootKvBackend;
+    use common_meta::kv_backend::memory::MemoryKvBackend;
+    use common_meta::kv_backend::{KvBackend, KvBackendRef};
+    use common_meta::rpc::store::RangeRequest;
+
+    use crate::metadata::control::del::key::KeyDeleter;
+    use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+    use crate::metadata::control::test_utils::put_key;
+
+    #[tokio::test]
+    async fn test_delete_keys() {
+        let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
+        let key_deleter = KeyDeleter::new(kv_backend.clone());
+        put_key(&kv_backend, "foo", "bar").await;
+        put_key(&kv_backend, "foo/bar", "baz").await;
+        put_key(&kv_backend, "foo/baz", "qux").await;
+        let deleted = key_deleter.delete("foo", true).await.unwrap();
+        assert_eq!(deleted, 3);
+        let deleted = key_deleter.delete("foo/bar", false).await.unwrap();
+        assert_eq!(deleted, 0);
+
+        let chroot = ChrootKvBackend::new(CLI_TOMBSTONE_PREFIX.as_bytes().to_vec(), kv_backend);
+        let req = RangeRequest::default().with_prefix(b"foo");
+        let resp = chroot.range(req).await.unwrap();
+        assert_eq!(resp.kvs.len(), 3);
+        assert_eq!(resp.kvs[0].key, b"foo");
+        assert_eq!(resp.kvs[0].value, b"bar");
+        assert_eq!(resp.kvs[1].key, b"foo/bar");
+        assert_eq!(resp.kvs[1].value, b"baz");
+        assert_eq!(resp.kvs[2].key, b"foo/baz");
+        assert_eq!(resp.kvs[2].value, b"qux");
+    }
+}
--- a/src/cli/src/metadata/control/del/table.rs
+++ b/src/cli/src/metadata/control/del/table.rs
@@ -0,0 +1,235 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use async_trait::async_trait;
+use clap::Parser;
+use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_catalog::format_full_table_name;
+use common_error::ext::BoxedError;
+use common_meta::ddl::utils::get_region_wal_options;
+use common_meta::key::table_name::TableNameManager;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use store_api::storage::TableId;
+
+use crate::error::{InvalidArgumentsSnafu, TableNotFoundSnafu};
+use crate::metadata::common::StoreConfig;
+use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+use crate::metadata::control::utils::get_table_id_by_name;
+use crate::Tool;
+
+/// Delete table metadata logically from the metadata store.
+#[derive(Debug, Default, Parser)]
+pub struct DelTableCommand {
+    /// The table id to delete from the metadata store.
+    #[clap(long)]
+    table_id: Option<u32>,
+
+    /// The table name to delete from the metadata store.
+    #[clap(long)]
+    table_name: Option<String>,
+
+    /// The schema name of the table.
+    #[clap(long, default_value = DEFAULT_SCHEMA_NAME)]
+    schema_name: String,
+
+    /// The catalog name of the table.
+    #[clap(long, default_value = DEFAULT_CATALOG_NAME)]
+    catalog_name: String,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+}
+
+impl DelTableCommand {
+    fn validate(&self) -> Result<(), BoxedError> {
+        if matches!(
+            (&self.table_id, &self.table_name),
+            (Some(_), Some(_)) | (None, None)
+        ) {
+            return Err(BoxedError::new(
+                InvalidArgumentsSnafu {
+                    msg: "You must specify either --table-id or --table-name.",
+                }
+                .build(),
+            ));
+        }
+        Ok(())
+    }
+}
+
+impl DelTableCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        self.validate()?;
+        let kv_backend = self.store.build().await?;
+        Ok(Box::new(DelTableTool {
+            table_id: self.table_id,
+            table_name: self.table_name.clone(),
+            schema_name: self.schema_name.clone(),
+            catalog_name: self.catalog_name.clone(),
+            table_name_manager: TableNameManager::new(kv_backend.clone()),
+            table_metadata_deleter: TableMetadataDeleter::new(kv_backend),
+        }))
+    }
+}
+
+struct DelTableTool {
+    table_id: Option<u32>,
+    table_name: Option<String>,
+    schema_name: String,
+    catalog_name: String,
+    table_name_manager: TableNameManager,
+    table_metadata_deleter: TableMetadataDeleter,
+}
+
+#[async_trait]
+impl Tool for DelTableTool {
+    async fn do_work(&self) -> Result<(), BoxedError> {
+        let table_id = if let Some(table_name) = &self.table_name {
+            let catalog_name = &self.catalog_name;
+            let schema_name = &self.schema_name;
+
+            let Some(table_id) = get_table_id_by_name(
+                &self.table_name_manager,
+                catalog_name,
+                schema_name,
+                table_name,
+            )
+            .await?
+            else {
+                println!(
+                    "Table({}) not found",
+                    format_full_table_name(catalog_name, schema_name, table_name)
+                );
+                return Ok(());
+            };
+            table_id
+        } else {
+            // Safety: we have validated that table_id or table_name is not None
+            self.table_id.unwrap()
+        };
+        self.table_metadata_deleter.delete(table_id).await?;
+        println!("Table({}) deleted", table_id);
+
+        Ok(())
+    }
+}
+
+struct TableMetadataDeleter {
+    table_metadata_manager: TableMetadataManager,
+}
+
+impl TableMetadataDeleter {
+    fn new(kv_backend: KvBackendRef) -> Self {
+        Self {
+            table_metadata_manager: TableMetadataManager::new_with_custom_tombstone_prefix(
+                kv_backend,
+                CLI_TOMBSTONE_PREFIX,
+            ),
+        }
+    }
+
+    async fn delete(&self, table_id: TableId) -> Result<(), BoxedError> {
+        let (table_info, table_route) = self
+            .table_metadata_manager
+            .get_full_table_info(table_id)
+            .await
+            .map_err(BoxedError::new)?;
+        let Some(table_info) = table_info else {
+            return Err(BoxedError::new(TableNotFoundSnafu { table_id }.build()));
+        };
+        let Some(table_route) = table_route else {
+            return Err(BoxedError::new(TableNotFoundSnafu { table_id }.build()));
+        };
+        let physical_table_id = self
+            .table_metadata_manager
+            .table_route_manager()
+            .get_physical_table_id(table_id)
+            .await
+            .map_err(BoxedError::new)?;
+
+        let table_name = table_info.table_name();
+        let region_wal_options = get_region_wal_options(
+            &self.table_metadata_manager,
+            &table_route,
+            physical_table_id,
+        )
+        .await
+        .map_err(BoxedError::new)?;
+
+        self.table_metadata_manager
+            .delete_table_metadata(table_id, &table_name, &table_route, &region_wal_options)
+            .await
+            .map_err(BoxedError::new)?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use common_error::ext::ErrorExt;
+    use common_error::status_code::StatusCode;
+    use common_meta::key::table_route::TableRouteValue;
+    use common_meta::key::TableMetadataManager;
+    use common_meta::kv_backend::chroot::ChrootKvBackend;
+    use common_meta::kv_backend::memory::MemoryKvBackend;
+    use common_meta::kv_backend::{KvBackend, KvBackendRef};
+    use common_meta::rpc::store::RangeRequest;
+
+    use crate::metadata::control::del::table::TableMetadataDeleter;
+    use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+    use crate::metadata::control::test_utils::prepare_physical_table_metadata;
+
+    #[tokio::test]
+    async fn test_delete_table_not_found() {
+        let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
+
+        let table_metadata_deleter = TableMetadataDeleter::new(kv_backend);
+        let table_id = 1;
+        let err = table_metadata_deleter.delete(table_id).await.unwrap_err();
+        assert_eq!(err.status_code(), StatusCode::TableNotFound);
+    }
+
+    #[tokio::test]
+    async fn test_delete_table_metadata() {
+        let kv_backend = Arc::new(MemoryKvBackend::new());
+        let table_metadata_manager = TableMetadataManager::new(kv_backend.clone());
+        let table_id = 1024;
+        let (table_info, table_route) = prepare_physical_table_metadata("my_table", table_id).await;
+        table_metadata_manager
+            .create_table_metadata(
+                table_info,
+                TableRouteValue::Physical(table_route),
+                HashMap::new(),
+            )
+            .await
+            .unwrap();
+
+        let total_keys = kv_backend.len();
+        assert!(total_keys > 0);
+
+        let table_metadata_deleter = TableMetadataDeleter::new(kv_backend.clone());
+        table_metadata_deleter.delete(table_id).await.unwrap();
+
+        // Check the tombstone keys are deleted
+        let chroot =
+            ChrootKvBackend::new(CLI_TOMBSTONE_PREFIX.as_bytes().to_vec(), kv_backend.clone());
+        let req = RangeRequest::default().with_range(vec![0], vec![0]);
+        let resp = chroot.range(req).await.unwrap();
+        assert_eq!(resp.kvs.len(), total_keys);
+    }
+}
--- a/src/cli/src/metadata/control/get.rs
+++ b/src/cli/src/metadata/control/get.rs
@@ -20,7 +20,6 @@ use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_catalog::format_full_table_name;
 use common_error::ext::BoxedError;
 use common_meta::key::table_info::TableInfoKey;
-use common_meta::key::table_name::TableNameKey;
 use common_meta::key::table_route::TableRouteKey;
 use common_meta::key::TableMetadataManager;
 use common_meta::kv_backend::KvBackendRef;
@@ -30,10 +29,10 @@ use futures::TryStreamExt;

 use crate::error::InvalidArgumentsSnafu;
 use crate::metadata::common::StoreConfig;
-use crate::metadata::control::utils::{decode_key_value, json_fromatter};
+use crate::metadata::control::utils::{decode_key_value, get_table_id_by_name, json_fromatter};
 use crate::Tool;

-/// Subcommand for get command.
+/// Getting metadata from metadata store.
 #[derive(Subcommand)]
 pub enum GetCommand {
    Key(GetKeyCommand),
@@ -52,7 +51,7 @@ impl GetCommand {
 /// Get key-value pairs from the metadata store.
 #[derive(Debug, Default, Parser)]
 pub struct GetKeyCommand {
-    /// The key to get from the metadata store. If empty, returns all key-value pairs.
+    /// The key to get from the metadata store.
    #[clap(default_value = "")]
    key: String,

@@ -130,8 +129,12 @@ pub struct GetTableCommand {
    table_name: Option<String>,

    /// The schema name of the table.
-    #[clap(long)]
-    schema_name: Option<String>,
+    #[clap(long, default_value = DEFAULT_SCHEMA_NAME)]
+    schema_name: String,
+
+    /// The catalog name of the table.
+    #[clap(long, default_value = DEFAULT_CATALOG_NAME)]
+    catalog_name: String,

    /// Pretty print the output.
    #[clap(long, default_value = "false")]
@@ -143,7 +146,10 @@ pub struct GetTableCommand {

 impl GetTableCommand {
    pub fn validate(&self) -> Result<(), BoxedError> {
-        if self.table_id.is_none() && self.table_name.is_none() {
+        if matches!(
+            (&self.table_id, &self.table_name),
+            (Some(_), Some(_)) | (None, None)
+        ) {
            return Err(BoxedError::new(
                InvalidArgumentsSnafu {
                    msg: "You must specify either --table-id or --table-name.",
@@ -159,7 +165,8 @@ struct GetTableTool {
    kvbackend: KvBackendRef,
    table_id: Option<u32>,
    table_name: Option<String>,
-    schema_name: Option<String>,
+    schema_name: String,
+    catalog_name: String,
    pretty: bool,
 }

@@ -172,23 +179,20 @@ impl Tool for GetTableTool {
        let table_route_manager = table_metadata_manager.table_route_manager();

        let table_id = if let Some(table_name) = &self.table_name {
-            let catalog = DEFAULT_CATALOG_NAME.to_string();
-            let schema_name = self
-                .schema_name
-                .clone()
-                .unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
-            let key = TableNameKey::new(&catalog, &schema_name, table_name);
+            let catalog_name = &self.catalog_name;
+            let schema_name = &self.schema_name;

-            let Some(table_name) = table_name_manager.get(key).await.map_err(BoxedError::new)?
+            let Some(table_id) =
+                get_table_id_by_name(table_name_manager, catalog_name, schema_name, table_name)
+                    .await?
            else {
                println!(
                    "Table({}) not found",
-                    format_full_table_name(&catalog, &schema_name, table_name)
+                    format_full_table_name(catalog_name, schema_name, table_name)
                );
                return Ok(());
            };
-
-            table_name.table_id()
+            table_id
        } else {
            // Safety: we have validated that table_id or table_name is not None
            self.table_id.unwrap()
@@ -236,6 +240,7 @@ impl GetTableCommand {
            table_id: self.table_id,
            table_name: self.table_name.clone(),
            schema_name: self.schema_name.clone(),
+            catalog_name: self.catalog_name.clone(),
            pretty: self.pretty,
        }))
    }
--- a/src/cli/src/metadata/control/test_utils.rs
+++ b/src/cli/src/metadata/control/test_utils.rs
@@ -0,0 +1,51 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_meta::ddl::test_util::test_create_physical_table_task;
+use common_meta::key::table_route::PhysicalTableRouteValue;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{Region, RegionRoute};
+use common_meta::rpc::store::PutRequest;
+use store_api::storage::{RegionId, TableId};
+use table::metadata::RawTableInfo;
+
+/// Puts a key-value pair into the kv backend.
+pub async fn put_key(kv_backend: &KvBackendRef, key: &str, value: &str) {
+    let put_req = PutRequest::new()
+        .with_key(key.as_bytes())
+        .with_value(value.as_bytes());
+    kv_backend.put(put_req).await.unwrap();
+}
+
+/// Prepares the physical table metadata for testing.
+///
+/// Returns the table info and the table route.
+pub async fn prepare_physical_table_metadata(
+    table_name: &str,
+    table_id: TableId,
+) -> (RawTableInfo, PhysicalTableRouteValue) {
+    let mut create_physical_table_task = test_create_physical_table_task(table_name);
+    let table_route = PhysicalTableRouteValue::new(vec![RegionRoute {
+        region: Region {
+            id: RegionId::new(table_id, 1),
+            ..Default::default()
+        },
+        leader_peer: Some(Peer::empty(1)),
+        ..Default::default()
+    }]);
+    create_physical_table_task.set_table_id(table_id);
+
+    (create_physical_table_task.table_info, table_route)
+}
--- a/src/cli/src/metadata/control/utils.rs
+++ b/src/cli/src/metadata/control/utils.rs
@@ -12,9 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use common_error::ext::BoxedError;
 use common_meta::error::Result as CommonMetaResult;
+use common_meta::key::table_name::{TableNameKey, TableNameManager};
 use common_meta::rpc::KeyValue;
 use serde::Serialize;
+use store_api::storage::TableId;

 /// Decodes a key-value pair into a string.
 pub fn decode_key_value(kv: KeyValue) -> CommonMetaResult<(String, String)> {
@@ -34,3 +37,21 @@ where
        serde_json::to_string(value).unwrap()
    }
 }
+
+/// Gets the table id by table name.
+pub async fn get_table_id_by_name(
+    table_name_manager: &TableNameManager,
+    catalog_name: &str,
+    schema_name: &str,
+    table_name: &str,
+) -> Result<Option<TableId>, BoxedError> {
+    let table_name_key = TableNameKey::new(catalog_name, schema_name, table_name);
+    let Some(table_name_value) = table_name_manager
+        .get(table_name_key)
+        .await
+        .map_err(BoxedError::new)?
+    else {
+        return Ok(None);
+    };
+    Ok(Some(table_name_value.table_id()))
+}
--- a/src/cli/src/metadata/repair.rs
+++ b/src/cli/src/metadata/repair.rs
@@ -0,0 +1,368 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod alter_table;
+mod create_table;
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use clap::Parser;
+use client::api::v1::CreateTableExpr;
+use client::client_manager::NodeClients;
+use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_error::ext::{BoxedError, ErrorExt};
+use common_error::status_code::StatusCode;
+use common_grpc::channel_manager::ChannelConfig;
+use common_meta::error::Error as CommonMetaError;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::node_manager::NodeManagerRef;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{find_leaders, RegionRoute};
+use common_telemetry::{error, info, warn};
+use futures::TryStreamExt;
+use snafu::{ensure, ResultExt};
+use store_api::storage::TableId;
+
+use crate::error::{
+    InvalidArgumentsSnafu, Result, SendRequestToDatanodeSnafu, TableMetadataSnafu, UnexpectedSnafu,
+};
+use crate::metadata::common::StoreConfig;
+use crate::metadata::utils::{FullTableMetadata, IteratorInput, TableMetadataIterator};
+use crate::Tool;
+
+/// Repair metadata of logical tables.
+#[derive(Debug, Default, Parser)]
+pub struct RepairLogicalTablesCommand {
+    /// The names of the tables to repair.
+    #[clap(long, value_delimiter = ',', alias = "table-name")]
+    table_names: Vec<String>,
+
+    /// The id of the table to repair.
+    #[clap(long, value_delimiter = ',', alias = "table-id")]
+    table_ids: Vec<TableId>,
+
+    /// The schema of the tables to repair.
+    #[clap(long, default_value = DEFAULT_SCHEMA_NAME)]
+    schema_name: String,
+
+    /// The catalog of the tables to repair.
+    #[clap(long, default_value = DEFAULT_CATALOG_NAME)]
+    catalog_name: String,
+
+    /// Whether to fail fast if any repair operation fails.
+    #[clap(long)]
+    fail_fast: bool,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+
+    /// The timeout for the client to operate the datanode.
+    #[clap(long, default_value_t = 30)]
+    client_timeout_secs: u64,
+
+    /// The timeout for the client to connect to the datanode.
+    #[clap(long, default_value_t = 3)]
+    client_connect_timeout_secs: u64,
+}
+
+impl RepairLogicalTablesCommand {
+    fn validate(&self) -> Result<()> {
+        ensure!(
+            !self.table_names.is_empty() || !self.table_ids.is_empty(),
+            InvalidArgumentsSnafu {
+                msg: "You must specify --table-names or --table-ids.",
+            }
+        );
+        Ok(())
+    }
+}
+
+impl RepairLogicalTablesCommand {
+    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
+        self.validate().map_err(BoxedError::new)?;
+        let kv_backend = self.store.build().await?;
+        let node_client_channel_config = ChannelConfig::new()
+            .timeout(Duration::from_secs(self.client_timeout_secs))
+            .connect_timeout(Duration::from_secs(self.client_connect_timeout_secs));
+        let node_manager = Arc::new(NodeClients::new(node_client_channel_config));
+
+        Ok(Box::new(RepairTool {
+            table_names: self.table_names.clone(),
+            table_ids: self.table_ids.clone(),
+            schema_name: self.schema_name.clone(),
+            catalog_name: self.catalog_name.clone(),
+            fail_fast: self.fail_fast,
+            kv_backend,
+            node_manager,
+        }))
+    }
+}
+
+struct RepairTool {
+    table_names: Vec<String>,
+    table_ids: Vec<TableId>,
+    schema_name: String,
+    catalog_name: String,
+    fail_fast: bool,
+    kv_backend: KvBackendRef,
+    node_manager: NodeManagerRef,
+}
+
+#[async_trait]
+impl Tool for RepairTool {
+    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
+        self.repair_tables().await.map_err(BoxedError::new)
+    }
+}
+
+impl RepairTool {
+    fn generate_iterator_input(&self) -> Result<IteratorInput> {
+        if !self.table_names.is_empty() {
+            let table_names = &self.table_names;
+            let catalog = &self.catalog_name;
+            let schema_name = &self.schema_name;
+
+            let table_names = table_names
+                .iter()
+                .map(|table_name| {
+                    (
+                        catalog.to_string(),
+                        schema_name.to_string(),
+                        table_name.to_string(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            return Ok(IteratorInput::new_table_names(table_names));
+        } else if !self.table_ids.is_empty() {
+            return Ok(IteratorInput::new_table_ids(self.table_ids.clone()));
+        };
+
+        InvalidArgumentsSnafu {
+            msg: "You must specify --table-names or --table-id.",
+        }
+        .fail()
+    }
+
+    async fn repair_tables(&self) -> Result<()> {
+        let input = self.generate_iterator_input()?;
+        let mut table_metadata_iterator =
+            Box::pin(TableMetadataIterator::new(self.kv_backend.clone(), input).into_stream());
+        let table_metadata_manager = TableMetadataManager::new(self.kv_backend.clone());
+
+        let mut skipped_table = 0;
+        let mut success_table = 0;
+        while let Some(full_table_metadata) = table_metadata_iterator.try_next().await? {
+            let full_table_name = full_table_metadata.full_table_name();
+            if !full_table_metadata.is_metric_engine() {
+                warn!(
+                    "Skipping repair for non-metric engine table: {}",
+                    full_table_name
+                );
+                skipped_table += 1;
+                continue;
+            }
+
+            if full_table_metadata.is_physical_table() {
+                warn!("Skipping repair for physical table: {}", full_table_name);
+                skipped_table += 1;
+                continue;
+            }
+
+            let (physical_table_id, physical_table_route) = table_metadata_manager
+                .table_route_manager()
+                .get_physical_table_route(full_table_metadata.table_id)
+                .await
+                .context(TableMetadataSnafu)?;
+
+            if let Err(err) = self
+                .repair_table(
+                    &full_table_metadata,
+                    physical_table_id,
+                    &physical_table_route.region_routes,
+                )
+                .await
+            {
+                error!(
+                    err;
+                    "Failed to repair table: {}, skipped table: {}",
+                    full_table_name,
+                    skipped_table,
+                );
+
+                if self.fail_fast {
+                    return Err(err);
+                }
+            } else {
+                success_table += 1;
+            }
+        }
+
+        info!(
+            "Repair logical tables result: {} tables repaired, {} tables skipped",
+            success_table, skipped_table
+        );
+
+        Ok(())
+    }
+
+    async fn alter_table_on_datanodes(
+        &self,
+        full_table_metadata: &FullTableMetadata,
+        physical_region_routes: &[RegionRoute],
+    ) -> Result<Vec<(Peer, CommonMetaError)>> {
+        let logical_table_id = full_table_metadata.table_id;
+        let alter_table_expr = alter_table::generate_alter_table_expr_for_all_columns(
+            &full_table_metadata.table_info,
+        )?;
+        let node_manager = self.node_manager.clone();
+
+        let mut failed_peers = Vec::new();
+        info!(
+            "Sending alter table requests to all datanodes for table: {}, number of regions:{}.",
+            full_table_metadata.full_table_name(),
+            physical_region_routes.len()
+        );
+        let leaders = find_leaders(physical_region_routes);
+        for peer in &leaders {
+            let alter_table_request = alter_table::make_alter_region_request_for_peer(
+                logical_table_id,
+                &alter_table_expr,
+                peer,
+                physical_region_routes,
+            )?;
+            let datanode = node_manager.datanode(peer).await;
+            if let Err(err) = datanode.handle(alter_table_request).await {
+                failed_peers.push((peer.clone(), err));
+            }
+        }
+
+        Ok(failed_peers)
+    }
+
+    async fn create_table_on_datanode(
+        &self,
+        create_table_expr: &CreateTableExpr,
+        logical_table_id: TableId,
+        physical_table_id: TableId,
+        peer: &Peer,
+        physical_region_routes: &[RegionRoute],
+    ) -> Result<()> {
+        let node_manager = self.node_manager.clone();
+        let datanode = node_manager.datanode(peer).await;
+        let create_table_request = create_table::make_create_region_request_for_peer(
+            logical_table_id,
+            physical_table_id,
+            create_table_expr,
+            peer,
+            physical_region_routes,
+        )?;
+
+        datanode
+            .handle(create_table_request)
+            .await
+            .with_context(|_| SendRequestToDatanodeSnafu { peer: peer.clone() })?;
+
+        Ok(())
+    }
+
+    async fn repair_table(
+        &self,
+        full_table_metadata: &FullTableMetadata,
+        physical_table_id: TableId,
+        physical_region_routes: &[RegionRoute],
+    ) -> Result<()> {
+        let full_table_name = full_table_metadata.full_table_name();
+        // First we sends alter table requests to all datanodes with all columns.
+        let failed_peers = self
+            .alter_table_on_datanodes(full_table_metadata, physical_region_routes)
+            .await?;
+
+        if failed_peers.is_empty() {
+            info!(
+                "All alter table requests sent successfully for table: {}",
+                full_table_name
+            );
+            return Ok(());
+        }
+        warn!(
+            "Sending alter table requests to datanodes for table: {} failed for the datanodes: {:?}",
+            full_table_name,
+            failed_peers.iter().map(|(peer, _)| peer.id).collect::<Vec<_>>()
+        );
+
+        let create_table_expr =
+            create_table::generate_create_table_expr(&full_table_metadata.table_info)?;
+
+        let mut errors = Vec::new();
+        for (peer, err) in failed_peers {
+            if err.status_code() != StatusCode::RegionNotFound {
+                error!(
+                    err;
+                    "Sending alter table requests to datanode: {} for table: {} failed",
+                    peer.id,
+                    full_table_name,
+                );
+                continue;
+            }
+            info!(
+                "Region not found for table: {}, datanode: {}, trying to create the logical table on that datanode",
+                full_table_name,
+                peer.id
+            );
+
+            // If the alter table request fails for any datanode, we attempt to create the table on that datanode
+            // as a fallback mechanism to ensure table consistency across the cluster.
+            if let Err(err) = self
+                .create_table_on_datanode(
+                    &create_table_expr,
+                    full_table_metadata.table_id,
+                    physical_table_id,
+                    &peer,
+                    physical_region_routes,
+                )
+                .await
+            {
+                error!(
+                    err;
+                    "Failed to create table on datanode: {} for table: {}",
+                    peer.id, full_table_name
+                );
+                errors.push(err);
+                if self.fail_fast {
+                    break;
+                }
+            } else {
+                info!(
+                    "Created table on datanode: {} for table: {}",
+                    peer.id, full_table_name
+                );
+            }
+        }
+
+        if !errors.is_empty() {
+            return UnexpectedSnafu {
+                msg: format!(
+                    "Failed to create table on datanodes for table: {}",
+                    full_table_name,
+                ),
+            }
+            .fail();
+        }
+
+        Ok(())
+    }
+}
--- a/src/cli/src/metadata/repair/alter_table.rs
+++ b/src/cli/src/metadata/repair/alter_table.rs
@@ -0,0 +1,84 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use client::api::v1::alter_table_expr::Kind;
+use client::api::v1::region::{region_request, AlterRequests, RegionRequest, RegionRequestHeader};
+use client::api::v1::{AddColumn, AddColumns, AlterTableExpr};
+use common_meta::ddl::alter_logical_tables::make_alter_region_request;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{find_leader_regions, RegionRoute};
+use operator::expr_helper::column_schemas_to_defs;
+use snafu::ResultExt;
+use store_api::storage::{RegionId, TableId};
+use table::metadata::RawTableInfo;
+
+use crate::error::{CovertColumnSchemasToDefsSnafu, Result};
+
+/// Generates alter table expression for all columns.
+pub fn generate_alter_table_expr_for_all_columns(
+    table_info: &RawTableInfo,
+) -> Result<AlterTableExpr> {
+    let schema = &table_info.meta.schema;
+
+    let mut alter_table_expr = AlterTableExpr {
+        catalog_name: table_info.catalog_name.to_string(),
+        schema_name: table_info.schema_name.to_string(),
+        table_name: table_info.name.to_string(),
+        ..Default::default()
+    };
+
+    let primary_keys = table_info
+        .meta
+        .primary_key_indices
+        .iter()
+        .map(|i| schema.column_schemas[*i].name.clone())
+        .collect::<Vec<_>>();
+
+    let add_columns = column_schemas_to_defs(schema.column_schemas.clone(), &primary_keys)
+        .context(CovertColumnSchemasToDefsSnafu)?;
+
+    alter_table_expr.kind = Some(Kind::AddColumns(AddColumns {
+        add_columns: add_columns
+            .into_iter()
+            .map(|col| AddColumn {
+                column_def: Some(col),
+                location: None,
+                add_if_not_exists: true,
+            })
+            .collect(),
+    }));
+
+    Ok(alter_table_expr)
+}
+
+/// Makes an alter region request for a peer.
+pub fn make_alter_region_request_for_peer(
+    logical_table_id: TableId,
+    alter_table_expr: &AlterTableExpr,
+    peer: &Peer,
+    region_routes: &[RegionRoute],
+) -> Result<RegionRequest> {
+    let regions_on_this_peer = find_leader_regions(region_routes, peer);
+    let mut requests = Vec::with_capacity(regions_on_this_peer.len());
+    for region_number in &regions_on_this_peer {
+        let region_id = RegionId::new(logical_table_id, *region_number);
+        let request = make_alter_region_request(region_id, alter_table_expr);
+        requests.push(request);
+    }
+
+    Ok(RegionRequest {
+        header: Some(RegionRequestHeader::default()),
+        body: Some(region_request::Body::Alters(AlterRequests { requests })),
+    })
+}
--- a/src/cli/src/metadata/repair/create_table.rs
+++ b/src/cli/src/metadata/repair/create_table.rs
@@ -0,0 +1,89 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use client::api::v1::region::{region_request, CreateRequests, RegionRequest, RegionRequestHeader};
+use client::api::v1::CreateTableExpr;
+use common_meta::ddl::create_logical_tables::create_region_request_builder;
+use common_meta::ddl::utils::region_storage_path;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{find_leader_regions, RegionRoute};
+use operator::expr_helper::column_schemas_to_defs;
+use snafu::ResultExt;
+use store_api::storage::{RegionId, TableId};
+use table::metadata::RawTableInfo;
+
+use crate::error::{CovertColumnSchemasToDefsSnafu, Result};
+
+/// Generates a `CreateTableExpr` from a `RawTableInfo`.
+pub fn generate_create_table_expr(table_info: &RawTableInfo) -> Result<CreateTableExpr> {
+    let schema = &table_info.meta.schema;
+    let primary_keys = table_info
+        .meta
+        .primary_key_indices
+        .iter()
+        .map(|i| schema.column_schemas[*i].name.clone())
+        .collect::<Vec<_>>();
+
+    let timestamp_index = schema.timestamp_index.as_ref().unwrap();
+    let time_index = schema.column_schemas[*timestamp_index].name.clone();
+    let column_defs = column_schemas_to_defs(schema.column_schemas.clone(), &primary_keys)
+        .context(CovertColumnSchemasToDefsSnafu)?;
+    let table_options = HashMap::from(&table_info.meta.options);
+
+    Ok(CreateTableExpr {
+        catalog_name: table_info.catalog_name.to_string(),
+        schema_name: table_info.schema_name.to_string(),
+        table_name: table_info.name.to_string(),
+        desc: String::default(),
+        column_defs,
+        time_index,
+        primary_keys,
+        create_if_not_exists: true,
+        table_options,
+        table_id: None,
+        engine: table_info.meta.engine.to_string(),
+    })
+}
+
+/// Makes a create region request for a peer.
+pub fn make_create_region_request_for_peer(
+    logical_table_id: TableId,
+    physical_table_id: TableId,
+    create_table_expr: &CreateTableExpr,
+    peer: &Peer,
+    region_routes: &[RegionRoute],
+) -> Result<RegionRequest> {
+    let regions_on_this_peer = find_leader_regions(region_routes, peer);
+    let mut requests = Vec::with_capacity(regions_on_this_peer.len());
+    let request_builder =
+        create_region_request_builder(create_table_expr, physical_table_id).unwrap();
+
+    let catalog = &create_table_expr.catalog_name;
+    let schema = &create_table_expr.schema_name;
+    let storage_path = region_storage_path(catalog, schema);
+
+    for region_number in &regions_on_this_peer {
+        let region_id = RegionId::new(logical_table_id, *region_number);
+        let region_request =
+            request_builder.build_one(region_id, storage_path.clone(), &HashMap::new());
+        requests.push(region_request);
+    }
+
+    Ok(RegionRequest {
+        header: Some(RegionRequestHeader::default()),
+        body: Some(region_request::Body::Creates(CreateRequests { requests })),
+    })
+}
--- a/src/cli/src/metadata/snapshot.rs
+++ b/src/cli/src/metadata/snapshot.rs
@@ -301,7 +301,6 @@ struct MetaInfoTool {

 #[async_trait]
 impl Tool for MetaInfoTool {
-    #[allow(clippy::print_stdout)]
    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
        let result = MetadataSnapshotManager::info(
            &self.inner,
--- a/src/cli/src/metadata/utils.rs
+++ b/src/cli/src/metadata/utils.rs
@@ -0,0 +1,178 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::VecDeque;
+
+use async_stream::try_stream;
+use common_catalog::consts::METRIC_ENGINE;
+use common_catalog::format_full_table_name;
+use common_meta::key::table_name::TableNameKey;
+use common_meta::key::table_route::TableRouteValue;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use futures::Stream;
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::TableId;
+use table::metadata::RawTableInfo;
+
+use crate::error::{Result, TableMetadataSnafu, UnexpectedSnafu};
+
+/// The input for the iterator.
+pub enum IteratorInput {
+    TableIds(VecDeque<TableId>),
+    TableNames(VecDeque<(String, String, String)>),
+}
+
+impl IteratorInput {
+    /// Creates a new iterator input from a list of table ids.
+    pub fn new_table_ids(table_ids: Vec<TableId>) -> Self {
+        Self::TableIds(table_ids.into())
+    }
+
+    /// Creates a new iterator input from a list of table names.
+    pub fn new_table_names(table_names: Vec<(String, String, String)>) -> Self {
+        Self::TableNames(table_names.into())
+    }
+}
+
+/// An iterator for retrieving table metadata from the metadata store.
+///
+/// This struct provides functionality to iterate over table metadata based on
+/// either [`TableId`] and their associated regions or fully qualified table names.
+pub struct TableMetadataIterator {
+    input: IteratorInput,
+    table_metadata_manager: TableMetadataManager,
+}
+
+/// The full table metadata.
+pub struct FullTableMetadata {
+    pub table_id: TableId,
+    pub table_info: RawTableInfo,
+    pub table_route: TableRouteValue,
+}
+
+impl FullTableMetadata {
+    /// Returns true if it's [TableRouteValue::Physical].
+    pub fn is_physical_table(&self) -> bool {
+        self.table_route.is_physical()
+    }
+
+    /// Returns true if it's a metric engine table.
+    pub fn is_metric_engine(&self) -> bool {
+        self.table_info.meta.engine == METRIC_ENGINE
+    }
+
+    /// Returns the full table name.
+    pub fn full_table_name(&self) -> String {
+        format_full_table_name(
+            &self.table_info.catalog_name,
+            &self.table_info.schema_name,
+            &self.table_info.name,
+        )
+    }
+}
+
+impl TableMetadataIterator {
+    pub fn new(kvbackend: KvBackendRef, input: IteratorInput) -> Self {
+        let table_metadata_manager = TableMetadataManager::new(kvbackend);
+        Self {
+            input,
+            table_metadata_manager,
+        }
+    }
+
+    /// Returns the next table metadata.
+    ///
+    /// This method handles two types of inputs:
+    /// - TableIds: Returns metadata for a specific [`TableId`].
+    /// - TableNames: Returns metadata for a table identified by its full name (catalog.schema.table).
+    ///
+    /// Returns `None` when there are no more tables to process.
+    pub async fn next(&mut self) -> Result<Option<FullTableMetadata>> {
+        match &mut self.input {
+            IteratorInput::TableIds(table_ids) => {
+                if let Some(table_id) = table_ids.pop_front() {
+                    let full_table_metadata = self.get_table_metadata(table_id).await?;
+                    return Ok(Some(full_table_metadata));
+                }
+            }
+
+            IteratorInput::TableNames(table_names) => {
+                if let Some(full_table_name) = table_names.pop_front() {
+                    let table_id = self.get_table_id_by_name(full_table_name).await?;
+                    let full_table_metadata = self.get_table_metadata(table_id).await?;
+                    return Ok(Some(full_table_metadata));
+                }
+            }
+        }
+
+        Ok(None)
+    }
+
+    /// Converts the iterator into a stream of table metadata.
+    pub fn into_stream(mut self) -> impl Stream<Item = Result<FullTableMetadata>> {
+        try_stream!({
+            while let Some(full_table_metadata) = self.next().await? {
+                yield full_table_metadata;
+            }
+        })
+    }
+
+    async fn get_table_id_by_name(
+        &mut self,
+        (catalog_name, schema_name, table_name): (String, String, String),
+    ) -> Result<TableId> {
+        let key = TableNameKey::new(&catalog_name, &schema_name, &table_name);
+        let table_id = self
+            .table_metadata_manager
+            .table_name_manager()
+            .get(key)
+            .await
+            .context(TableMetadataSnafu)?
+            .with_context(|| UnexpectedSnafu {
+                msg: format!(
+                    "Table not found: {}",
+                    format_full_table_name(&catalog_name, &schema_name, &table_name)
+                ),
+            })?
+            .table_id();
+        Ok(table_id)
+    }
+
+    async fn get_table_metadata(&mut self, table_id: TableId) -> Result<FullTableMetadata> {
+        let (table_info, table_route) = self
+            .table_metadata_manager
+            .get_full_table_info(table_id)
+            .await
+            .context(TableMetadataSnafu)?;
+
+        let table_info = table_info
+            .with_context(|| UnexpectedSnafu {
+                msg: format!("Table info not found for table id: {table_id}"),
+            })?
+            .into_inner()
+            .table_info;
+        let table_route = table_route
+            .with_context(|| UnexpectedSnafu {
+                msg: format!("Table route not found for table id: {table_id}"),
+            })?
+            .into_inner();
+
+        Ok(FullTableMetadata {
+            table_id,
+            table_info,
+            table_route,
+        })
+    }
+}
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -23,7 +23,7 @@ use api::v1::greptime_request::Request;
 use api::v1::query_request::Query;
 use api::v1::{
    AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
-    InsertRequests, QueryRequest, RequestHeader,
+    InsertRequests, QueryRequest, RequestHeader, RowInsertRequests,
 };
 use arrow_flight::{FlightData, Ticket};
 use async_stream::stream;
@@ -31,7 +31,7 @@ use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use common_catalog::build_db_string;
 use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
-use common_error::ext::{BoxedError, ErrorExt};
+use common_error::ext::BoxedError;
 use common_grpc::flight::do_put::DoPutResponse;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_query::Output;
@@ -42,13 +42,13 @@ use common_telemetry::{error, warn};
 use futures::future;
 use futures_util::{Stream, StreamExt, TryStreamExt};
 use prost::Message;
-use snafu::{ensure, ResultExt};
+use snafu::{ensure, OptionExt, ResultExt};
 use tonic::metadata::{AsciiMetadataKey, AsciiMetadataValue, MetadataMap, MetadataValue};
 use tonic::transport::Channel;

 use crate::error::{
    ConvertFlightDataSnafu, Error, FlightGetSnafu, IllegalFlightMessagesSnafu,
-    InvalidTonicMetadataValueSnafu, ServerSnafu,
+    InvalidTonicMetadataValueSnafu,
 };
 use crate::{error, from_grpc_response, Client, Result};

@@ -118,6 +118,7 @@ impl Database {
        }
    }

+    /// Set the catalog for the database client.
    pub fn set_catalog(&mut self, catalog: impl Into<String>) {
        self.catalog = catalog.into();
    }
@@ -130,6 +131,7 @@ impl Database {
        }
    }

+    /// Set the schema for the database client.
    pub fn set_schema(&mut self, schema: impl Into<String>) {
        self.schema = schema.into();
    }
@@ -142,20 +144,24 @@ impl Database {
        }
    }

+    /// Set the timezone for the database client.
    pub fn set_timezone(&mut self, timezone: impl Into<String>) {
        self.timezone = timezone.into();
    }

+    /// Set the auth scheme for the database client.
    pub fn set_auth(&mut self, auth: AuthScheme) {
        self.ctx.auth_header = Some(AuthHeader {
            auth_scheme: Some(auth),
        });
    }

+    /// Make an InsertRequests request to the database.
    pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
        self.handle(Request::Inserts(requests)).await
    }

+    /// Make an InsertRequests request to the database with hints.
    pub async fn insert_with_hints(
        &self,
        requests: InsertRequests,
@@ -172,6 +178,28 @@ impl Database {
        from_grpc_response(response)
    }

+    /// Make a RowInsertRequests request to the database.
+    pub async fn row_inserts(&self, requests: RowInsertRequests) -> Result<u32> {
+        self.handle(Request::RowInserts(requests)).await
+    }
+
+    /// Make a RowInsertRequests request to the database with hints.
+    pub async fn row_inserts_with_hints(
+        &self,
+        requests: RowInsertRequests,
+        hints: &[(&str, &str)],
+    ) -> Result<u32> {
+        let mut client = make_database_client(&self.client)?.inner;
+        let request = self.to_rpc_request(Request::RowInserts(requests));
+
+        let mut request = tonic::Request::new(request);
+        let metadata = request.metadata_mut();
+        Self::put_hints(metadata, hints)?;
+
+        let response = client.handle(request).await?.into_inner();
+        from_grpc_response(response)
+    }
+
    fn put_hints(metadata: &mut MetadataMap, hints: &[(&str, &str)]) -> Result<()> {
        let Some(value) = hints
            .iter()
@@ -187,6 +215,7 @@ impl Database {
        Ok(())
    }

+    /// Make a request to the database.
    pub async fn handle(&self, request: Request) -> Result<u32> {
        let mut client = make_database_client(&self.client)?.inner;
        let request = self.to_rpc_request(request);
@@ -196,12 +225,22 @@ impl Database {

    /// Retry if connection fails, max_retries is the max number of retries, so the total wait time
    /// is `max_retries * GRPC_CONN_TIMEOUT`
-    pub async fn handle_with_retry(&self, request: Request, max_retries: u32) -> Result<u32> {
+    pub async fn handle_with_retry(
+        &self,
+        request: Request,
+        max_retries: u32,
+        hints: &[(&str, &str)],
+    ) -> Result<u32> {
        let mut client = make_database_client(&self.client)?.inner;
        let mut retries = 0;
+
        let request = self.to_rpc_request(request);
+
        loop {
-            let raw_response = client.handle(request.clone()).await;
+            let mut tonic_request = tonic::Request::new(request.clone());
+            let metadata = tonic_request.metadata_mut();
+            Self::put_hints(metadata, hints)?;
+            let raw_response = client.handle(tonic_request).await;
            match (raw_response, retries < max_retries) {
                (Ok(resp), _) => return from_grpc_response(resp.into_inner()),
                (Err(err), true) => {
@@ -211,12 +250,18 @@ impl Database {
                        retries += 1;
                        warn!("Retrying {} times with error = {:?}", retries, err);
                        continue;
+                    } else {
+                        error!(
+                            err; "Failed to send request to grpc handle, retries = {}, not retryable error, aborting",
+                            retries
+                        );
+                        return Err(err.into());
                    }
                }
                (Err(err), false) => {
                    error!(
-                        "Failed to send request to grpc handle after {} retries, error = {:?}",
-                        retries, err
+                        err; "Failed to send request to grpc handle after {} retries",
+                        retries,
                    );
                    return Err(err.into());
                }
@@ -240,6 +285,7 @@ impl Database {
        }
    }

+    /// Executes a SQL query without any hints.
    pub async fn sql<S>(&self, sql: S) -> Result<Output>
    where
        S: AsRef<str>,
@@ -247,6 +293,7 @@ impl Database {
        self.sql_with_hint(sql, &[]).await
    }

+    /// Executes a SQL query with optional hints for query optimization.
    pub async fn sql_with_hint<S>(&self, sql: S, hints: &[(&str, &str)]) -> Result<Output>
    where
        S: AsRef<str>,
@@ -257,6 +304,7 @@ impl Database {
        self.do_get(request, hints).await
    }

+    /// Executes a logical plan directly without SQL parsing.
    pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
        let request = Request::Query(QueryRequest {
            query: Some(Query::LogicalPlan(logical_plan)),
@@ -264,6 +312,7 @@ impl Database {
        self.do_get(request, &[]).await
    }

+    /// Creates a new table using the provided table expression.
    pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
        let request = Request::Ddl(DdlRequest {
            expr: Some(DdlExpr::CreateTable(expr)),
@@ -271,6 +320,7 @@ impl Database {
        self.do_get(request, &[]).await
    }

+    /// Alters an existing table using the provided alter expression.
    pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
        let request = Request::Ddl(DdlRequest {
            expr: Some(DdlExpr::AlterTable(expr)),
@@ -292,21 +342,16 @@ impl Database {
        let response = client.mut_inner().do_get(request).await.or_else(|e| {
            let tonic_code = e.code();
            let e: Error = e.into();
-            let code = e.status_code();
-            let msg = e.to_string();
-            let error =
-                Err(BoxedError::new(ServerSnafu { code, msg }.build())).with_context(|_| {
-                    FlightGetSnafu {
-                        addr: client.addr().to_string(),
-                        tonic_code,
-                    }
-                });
            error!(
                "Failed to do Flight get, addr: {}, code: {}, source: {:?}",
                client.addr(),
                tonic_code,
-                error
+                e
            );
+            let error = Err(BoxedError::new(e)).with_context(|_| FlightGetSnafu {
+                addr: client.addr().to_string(),
+                tonic_code,
+            });
            error
        })?;

@@ -316,7 +361,10 @@ impl Database {
        let mut flight_message_stream = flight_data_stream.map(move |flight_data| {
            flight_data
                .map_err(Error::from)
-                .and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))
+                .and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))?
+                .context(IllegalFlightMessagesSnafu {
+                    reason: "none message",
+                })
        });

        let Some(first_flight_message) = flight_message_stream.next().await else {
@@ -436,8 +484,11 @@ mod tests {

    use api::v1::auth_header::AuthScheme;
    use api::v1::{AuthHeader, Basic};
+    use common_error::status_code::StatusCode;
+    use tonic::{Code, Status};

    use super::*;
+    use crate::error::TonicSnafu;

    #[test]
    fn test_flight_ctx() {
@@ -460,4 +511,19 @@ mod tests {
            })
        )
    }
+
+    #[test]
+    fn test_from_tonic_status() {
+        let expected = TonicSnafu {
+            code: StatusCode::Internal,
+            msg: "blabla".to_string(),
+            tonic_code: Code::Internal,
+        }
+        .build();
+
+        let status = Status::new(Code::Internal, "blabla");
+        let actual: Error = status.into();
+
+        assert_eq!(expected.to_string(), actual.to_string());
+    }
 }
--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -14,13 +14,13 @@

 use std::any::Any;

+use common_error::define_from_tonic_status;
 use common_error::ext::{BoxedError, ErrorExt};
-use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode};
-use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
+use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use snafu::{location, Location, Snafu};
 use tonic::metadata::errors::InvalidMetadataValue;
-use tonic::{Code, Status};
+use tonic::Code;

 #[derive(Snafu)]
 #[snafu(visibility(pub))]
@@ -124,6 +124,15 @@ pub enum Error {
        location: Location,
        source: datatypes::error::Error,
    },
+
+    #[snafu(display("{}", msg))]
+    Tonic {
+        code: StatusCode,
+        msg: String,
+        tonic_code: Code,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -135,7 +144,7 @@ impl ErrorExt for Error {
            | Error::MissingField { .. }
            | Error::IllegalDatabaseResponse { .. } => StatusCode::Internal,

-            Error::Server { code, .. } => *code,
+            Error::Server { code, .. } | Error::Tonic { code, .. } => *code,
            Error::FlightGet { source, .. }
            | Error::RegionServer { source, .. }
            | Error::FlowServer { source, .. } => source.status_code(),
@@ -153,34 +162,7 @@ impl ErrorExt for Error {
    }
 }

-impl From<Status> for Error {
-    fn from(e: Status) -> Self {
-        fn get_metadata_value(e: &Status, key: &str) -> Option<String> {
-            e.metadata()
-                .get(key)
-                .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
-        }
-
-        let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| {
-            if let Ok(code) = s.parse::<u32>() {
-                StatusCode::from_u32(code)
-            } else {
-                None
-            }
-        });
-        let tonic_code = e.code();
-        let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code));
-
-        let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
-            .unwrap_or_else(|| e.message().to_string());
-
-        Self::Server {
-            code,
-            msg,
-            location: location!(),
-        }
-    }
-}
+define_from_tonic_status!(Error, Tonic);

 impl Error {
    pub fn should_retry(&self) -> bool {
--- a/src/client/src/flow.rs
+++ b/src/client/src/flow.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use api::v1::flow::{FlowRequest, FlowResponse};
+use api::v1::flow::{DirtyWindowRequest, DirtyWindowRequests, FlowRequest, FlowResponse};
 use api::v1::region::InsertRequests;
 use common_error::ext::BoxedError;
 use common_meta::node_manager::Flownode;
@@ -44,6 +44,16 @@ impl Flownode for FlowRequester {
            .map_err(BoxedError::new)
            .context(common_meta::error::ExternalSnafu)
    }
+
+    async fn handle_mark_window_dirty(
+        &self,
+        req: DirtyWindowRequest,
+    ) -> common_meta::error::Result<FlowResponse> {
+        self.handle_mark_window_dirty(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(common_meta::error::ExternalSnafu)
+    }
 }

 impl FlowRequester {
@@ -91,4 +101,20 @@ impl FlowRequester {
            .into_inner();
        Ok(response)
    }
+
+    async fn handle_mark_window_dirty(&self, req: DirtyWindowRequest) -> Result<FlowResponse> {
+        let (addr, mut client) = self.client.raw_flow_client()?;
+        let response = client
+            .handle_mark_dirty_time_window(DirtyWindowRequests {
+                requests: vec![req],
+            })
+            .await
+            .or_else(|e| {
+                let code = e.code();
+                let err: crate::error::Error = e.into();
+                Err(BoxedError::new(err)).context(FlowServerSnafu { addr, code })
+            })?
+            .into_inner();
+        Ok(response)
+    }
 }
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -21,7 +21,7 @@ use arc_swap::ArcSwapOption;
 use arrow_flight::Ticket;
 use async_stream::stream;
 use async_trait::async_trait;
-use common_error::ext::{BoxedError, ErrorExt};
+use common_error::ext::BoxedError;
 use common_error::status_code::StatusCode;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_meta::error::{self as meta_error, Result as MetaResult};
@@ -107,24 +107,18 @@ impl RegionRequester {
            .mut_inner()
            .do_get(ticket)
            .await
-            .map_err(|e| {
+            .or_else(|e| {
                let tonic_code = e.code();
                let e: error::Error = e.into();
-                let code = e.status_code();
-                let msg = e.to_string();
-                let error = ServerSnafu { code, msg }
-                    .fail::<()>()
-                    .map_err(BoxedError::new)
-                    .with_context(|_| FlightGetSnafu {
-                        tonic_code,
-                        addr: flight_client.addr().to_string(),
-                    })
-                    .unwrap_err();
                error!(
                    e; "Failed to do Flight get, addr: {}, code: {}",
                    flight_client.addr(),
                    tonic_code
                );
+                let error = Err(BoxedError::new(e)).with_context(|_| FlightGetSnafu {
+                    addr: flight_client.addr().to_string(),
+                    tonic_code,
+                });
                error
            })?;

@@ -134,7 +128,10 @@ impl RegionRequester {
        let mut flight_message_stream = flight_data_stream.map(move |flight_data| {
            flight_data
                .map_err(Error::from)
-                .and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))
+                .and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))?
+                .context(IllegalFlightMessagesSnafu {
+                    reason: "none message",
+                })
        });

        let Some(first_flight_message) = flight_message_stream.next().await else {
@@ -163,19 +160,70 @@ impl RegionRequester {
            let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
                "poll_flight_data_stream"
            ));
-            while let Some(flight_message) = flight_message_stream.next().await {
-                let flight_message = flight_message
-                    .map_err(BoxedError::new)
-                    .context(ExternalSnafu)?;
+
+            let mut buffered_message: Option<FlightMessage> = None;
+            let mut stream_ended = false;
+
+            while !stream_ended {
+                // get the next message from the buffered message or read from the flight message stream
+                let flight_message_item = if let Some(msg) = buffered_message.take() {
+                    Some(Ok(msg))
+                } else {
+                    flight_message_stream.next().await
+                };
+
+                let flight_message = match flight_message_item {
+                    Some(Ok(message)) => message,
+                    Some(Err(e)) => {
+                        yield Err(BoxedError::new(e)).context(ExternalSnafu);
+                        break;
+                    }
+                    None => break,
+                };

                match flight_message {
                    FlightMessage::RecordBatch(record_batch) => {
-                        yield RecordBatch::try_from_df_record_batch(
+                        let result_to_yield = RecordBatch::try_from_df_record_batch(
                            schema_cloned.clone(),
                            record_batch,
-                        )
+                        );
+
+                        // get the next message from the stream. normally it should be a metrics message.
+                        if let Some(next_flight_message_result) = flight_message_stream.next().await
+                        {
+                            match next_flight_message_result {
+                                Ok(FlightMessage::Metrics(s)) => {
+                                    let m = serde_json::from_str(&s).ok().map(Arc::new);
+                                    metrics_ref.swap(m);
+                                }
+                                Ok(FlightMessage::RecordBatch(rb)) => {
+                                    // for some reason it's not a metrics message, so we need to buffer this record batch
+                                    // and yield it in the next iteration.
+                                    buffered_message = Some(FlightMessage::RecordBatch(rb));
+                                }
+                                Ok(_) => {
+                                    yield IllegalFlightMessagesSnafu {
+                                        reason: "A RecordBatch message can only be succeeded by a Metrics message or another RecordBatch message"
+                                    }
+                                    .fail()
+                                    .map_err(BoxedError::new)
+                                    .context(ExternalSnafu);
+                                    break;
+                                }
+                                Err(e) => {
+                                    yield Err(BoxedError::new(e)).context(ExternalSnafu);
+                                    break;
+                                }
+                            }
+                        } else {
+                            // the stream has ended
+                            stream_ended = true;
+                        }
+
+                        yield result_to_yield;
                    }
                    FlightMessage::Metrics(s) => {
+                        // just a branch in case of some metrics message comes after other things.
                        let m = serde_json::from_str(&s).ok().map(Arc::new);
                        metrics_ref.swap(m);
                        break;
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -16,7 +16,7 @@ default = [
    "meta-srv/pg_kvbackend",
    "meta-srv/mysql_kvbackend",
 ]
-enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise"]
+enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise", "catalog/enterprise"]
 tokio-console = ["common-telemetry/tokio-console"]

 [lints]
@@ -52,7 +52,6 @@ common-version.workspace = true
 common-wal.workspace = true
 datanode.workspace = true
 datatypes.workspace = true
-either = "1.8"
 etcd-client.workspace = true
 file-engine.workspace = true
 flow.workspace = true
@@ -67,6 +66,7 @@ metric-engine.workspace = true
 mito2.workspace = true
 moka.workspace = true
 nu-ansi-term = "0.46"
+object-store.workspace = true
 plugins.workspace = true
 prometheus.workspace = true
 prost.workspace = true
--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -20,11 +20,11 @@ use cmd::error::{InitTlsProviderSnafu, Result};
 use cmd::options::GlobalOptions;
 use cmd::{cli, datanode, flownode, frontend, metasrv, standalone, App};
 use common_base::Plugins;
-use common_version::version;
+use common_version::{verbose_version, version};
 use servers::install_ring_crypto_provider;

 #[derive(Parser)]
-#[command(name = "greptime", author, version, long_version = version(), about)]
+#[command(name = "greptime", author, version, long_version = verbose_version(), about)]
 #[command(propagate_version = true)]
 pub(crate) struct Command {
    #[clap(subcommand)]
@@ -143,10 +143,8 @@ async fn start(cli: Command) -> Result<()> {
 }

 fn setup_human_panic() {
-    human_panic::setup_panic!(
-        human_panic::Metadata::new("GreptimeDB", env!("CARGO_PKG_VERSION"))
-            .homepage("https://github.com/GreptimeTeam/greptimedb/discussions")
-    );
+    human_panic::setup_panic!(human_panic::Metadata::new("GreptimeDB", version())
+        .homepage("https://github.com/GreptimeTeam/greptimedb/discussions"));

    common_telemetry::set_panic_hook();
 }
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -280,7 +280,7 @@ mod tests {

    use common_config::ENV_VAR_SEP;
    use common_test_util::temp_dir::create_named_temp_file;
-    use datanode::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
+    use object_store::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
    use servers::heartbeat_options::HeartbeatOptions;

    use super::*;
--- a/src/cmd/src/datanode/builder.rs
+++ b/src/cmd/src/datanode/builder.rs
@@ -19,7 +19,7 @@ use catalog::kvbackend::MetaKvBackend;
 use common_base::Plugins;
 use common_meta::cache::LayeredCacheRegistryBuilder;
 use common_telemetry::info;
-use common_version::{short_version, version};
+use common_version::{short_version, verbose_version};
 use datanode::datanode::DatanodeBuilder;
 use datanode::service::DatanodeServiceBuilder;
 use meta_client::MetaClientType;
@@ -67,7 +67,7 @@ impl InstanceBuilder {
            None,
        );

-        log_versions(version(), short_version(), APP_NAME);
+        log_versions(verbose_version(), short_version(), APP_NAME);
        create_resource_limit_metrics(APP_NAME);

        plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
@@ -93,6 +93,7 @@ impl InstanceBuilder {
            MetaClientType::Datanode { member_id },
            meta_client_options,
            Some(&plugins),
+            None,
        )
        .await
        .context(MetaClientInitSnafu)?;
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -18,7 +18,7 @@ use std::time::Duration;

 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_extension::DistributedInformationExtension;
-use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
+use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
 use clap::Parser;
 use client::client_manager::NodeClients;
 use common_base::Plugins;
@@ -32,7 +32,7 @@ use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
 use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
-use common_version::{short_version, version};
+use common_version::{short_version, verbose_version};
 use flow::{
    get_flow_auth_options, FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder,
    FrontendClient, FrontendInvoker,
@@ -55,14 +55,32 @@ type FlownodeOptions = GreptimeOptions<flow::FlownodeOptions>;
 pub struct Instance {
    flownode: FlownodeInstance,

+    // The components of flownode, which make it easier to expand based
+    // on the components.
+    #[cfg(feature = "enterprise")]
+    components: Components,
+
    // Keep the logging guard to prevent the worker from being dropped.
    _guard: Vec<WorkerGuard>,
 }

+#[cfg(feature = "enterprise")]
+pub struct Components {
+    pub catalog_manager: catalog::CatalogManagerRef,
+    pub fe_client: Arc<FrontendClient>,
+    pub kv_backend: common_meta::kv_backend::KvBackendRef,
+}
+
 impl Instance {
-    pub fn new(flownode: FlownodeInstance, guard: Vec<WorkerGuard>) -> Self {
+    pub fn new(
+        flownode: FlownodeInstance,
+        #[cfg(feature = "enterprise")] components: Components,
+        guard: Vec<WorkerGuard>,
+    ) -> Self {
        Self {
            flownode,
+            #[cfg(feature = "enterprise")]
+            components,
            _guard: guard,
        }
    }
@@ -75,6 +93,11 @@ impl Instance {
    pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
        &mut self.flownode
    }
+
+    #[cfg(feature = "enterprise")]
+    pub fn components(&self) -> &Components {
+        &self.components
+    }
 }

 #[async_trait::async_trait]
@@ -256,7 +279,7 @@ impl StartCommand {
            None,
        );

-        log_versions(version(), short_version(), APP_NAME);
+        log_versions(verbose_version(), short_version(), APP_NAME);
        create_resource_limit_metrics(APP_NAME);

        info!("Flownode start command: {:#?}", self);
@@ -283,6 +306,7 @@ impl StartCommand {
            MetaClientType::Flownode { member_id },
            meta_config,
            None,
+            None,
        )
        .await
        .context(MetaClientInitSnafu)?;
@@ -318,13 +342,12 @@ impl StartCommand {

        let information_extension =
            Arc::new(DistributedInformationExtension::new(meta_client.clone()));
-        let catalog_manager = KvBackendCatalogManager::new(
+        let catalog_manager = KvBackendCatalogManagerBuilder::new(
            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
-            None,
-            None,
-        );
+        )
+        .build();

        let table_metadata_manager =
            Arc::new(TableMetadataManager::new(cached_meta_backend.clone()));
@@ -347,21 +370,26 @@ impl StartCommand {

        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
        let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
-        let frontend_client =
-            FrontendClient::from_meta_client(meta_client.clone(), flow_auth_header);
+        let frontend_client = FrontendClient::from_meta_client(
+            meta_client.clone(),
+            flow_auth_header,
+            opts.query.clone(),
+            opts.flow.batching_mode.clone(),
+        );
+        let frontend_client = Arc::new(frontend_client);
        let flownode_builder = FlownodeBuilder::new(
            opts.clone(),
            plugins,
            table_metadata_manager,
            catalog_manager.clone(),
            flow_metadata_manager,
-            Arc::new(frontend_client),
+            frontend_client.clone(),
        )
        .with_heartbeat_task(heartbeat_task);

        let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
        let services = FlownodeServiceBuilder::new(&opts)
-            .with_grpc_server(flownode.flownode_server().clone())
+            .with_default_grpc_server(flownode.flownode_server())
            .enable_http_service()
            .build()
            .context(StartFlownodeSnafu)?;
@@ -393,6 +421,16 @@ impl StartCommand {
            .set_frontend_invoker(invoker)
            .await;

-        Ok(Instance::new(flownode, guard))
+        #[cfg(feature = "enterprise")]
+        let components = Components {
+            catalog_manager: catalog_manager.clone(),
+            fe_client: frontend_client,
+            kv_backend: cached_meta_backend,
+        };
+
+        #[cfg(not(feature = "enterprise"))]
+        return Ok(Instance::new(flownode, guard));
+        #[cfg(feature = "enterprise")]
+        Ok(Instance::new(flownode, components, guard))
    }
 }
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -19,7 +19,7 @@ use std::time::Duration;
 use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_extension::DistributedInformationExtension;
-use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
+use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
 use catalog::process_manager::ProcessManager;
 use clap::Parser;
 use client::client_manager::NodeClients;
@@ -33,7 +33,7 @@ use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_telemetry::info;
 use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
 use common_time::timezone::set_default_timezone;
-use common_version::{short_version, version};
+use common_version::{short_version, verbose_version};
 use frontend::frontend::Frontend;
 use frontend::heartbeat::HeartbeatTask;
 use frontend::instance::builder::FrontendBuilder;
@@ -102,7 +102,7 @@ impl App for Instance {
 #[derive(Parser)]
 pub struct Command {
    #[clap(subcommand)]
-    subcmd: SubCommand,
+    pub subcmd: SubCommand,
 }

 impl Command {
@@ -116,7 +116,7 @@ impl Command {
 }

 #[derive(Parser)]
-enum SubCommand {
+pub enum SubCommand {
    Start(StartCommand),
 }

@@ -153,7 +153,7 @@ pub struct StartCommand {
    #[clap(long)]
    postgres_addr: Option<String>,
    #[clap(short, long)]
-    config_file: Option<String>,
+    pub config_file: Option<String>,
    #[clap(short, long)]
    influxdb_enable: Option<bool>,
    #[clap(long, value_delimiter = ',', num_args = 1..)]
@@ -169,7 +169,7 @@ pub struct StartCommand {
    #[clap(long)]
    disable_dashboard: Option<bool>,
    #[clap(long, default_value = "GREPTIMEDB_FRONTEND")]
-    env_prefix: String,
+    pub env_prefix: String,
 }

 impl StartCommand {
@@ -282,7 +282,7 @@ impl StartCommand {
            opts.component.slow_query.as_ref(),
        );

-        log_versions(version(), short_version(), APP_NAME);
+        log_versions(verbose_version(), short_version(), APP_NAME);
        create_resource_limit_metrics(APP_NAME);

        info!("Frontend start command: {:#?}", self);
@@ -313,6 +313,7 @@ impl StartCommand {
            MetaClientType::Frontend,
            meta_client_options,
            Some(&plugins),
+            None,
        )
        .await
        .context(error::MetaClientInitSnafu)?;
@@ -349,13 +350,20 @@ impl StartCommand {
            addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
            Some(meta_client.clone()),
        ));
-        let catalog_manager = KvBackendCatalogManager::new(
+
+        let builder = KvBackendCatalogManagerBuilder::new(
            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
-            None,
-            Some(process_manager.clone()),
-        );
+        )
+        .with_process_manager(process_manager.clone());
+        #[cfg(feature = "enterprise")]
+        let builder = if let Some(factories) = plugins.get() {
+            builder.with_extra_information_table_factories(factories)
+        } else {
+            builder
+        };
+        let catalog_manager = builder.build();

        let executor = HandlerGroupExecutor::new(vec![
            Arc::new(ParseMailboxMessageHandler),
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -112,7 +112,7 @@ pub trait App: Send {
 pub fn log_versions(version: &str, short_version: &str, app: &str) {
    // Report app version as gauge.
    APP_VERSION
-        .with_label_values(&[env!("CARGO_PKG_VERSION"), short_version, app])
+        .with_label_values(&[common_version::version(), short_version, app])
        .inc();

    // Log version and argument flags.
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -22,7 +22,7 @@ use common_base::Plugins;
 use common_config::Configurable;
 use common_telemetry::info;
 use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
-use common_version::{short_version, version};
+use common_version::{short_version, verbose_version};
 use meta_srv::bootstrap::MetasrvInstance;
 use meta_srv::metasrv::BackendImpl;
 use snafu::ResultExt;
@@ -54,6 +54,10 @@ impl Instance {
    pub fn get_inner(&self) -> &MetasrvInstance {
        &self.instance
    }
+
+    pub fn mut_inner(&mut self) -> &mut MetasrvInstance {
+        &mut self.instance
+    }
 }

 #[async_trait]
@@ -320,7 +324,7 @@ impl StartCommand {
            None,
        );

-        log_versions(version(), short_version(), APP_NAME);
+        log_versions(verbose_version(), short_version(), APP_NAME);
        create_resource_limit_metrics(APP_NAME);

        info!("Metasrv start command: {:#?}", self);
@@ -336,12 +340,12 @@ impl StartCommand {
            .await
            .context(StartMetaServerSnafu)?;

-        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
+        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins, None)
            .await
            .context(error::BuildMetaServerSnafu)?;
        let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;

-        let instance = MetasrvInstance::new(opts, plugins, metasrv)
+        let instance = MetasrvInstance::new(metasrv)
            .await
            .context(error::BuildMetaServerSnafu)?;

--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -20,7 +20,7 @@ use std::{fs, path};
 use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_schema::InformationExtension;
-use catalog::kvbackend::KvBackendCatalogManager;
+use catalog::kvbackend::KvBackendCatalogManagerBuilder;
 use catalog::process_manager::ProcessManager;
 use clap::Parser;
 use client::api::v1::meta::RegionRole;
@@ -30,20 +30,16 @@ use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
 use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
 use common_error::ext::BoxedError;
 use common_meta::cache::LayeredCacheRegistryBuilder;
-use common_meta::cache_invalidator::CacheInvalidatorRef;
 use common_meta::cluster::{NodeInfo, NodeStatus};
 use common_meta::datanode::RegionStat;
-use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
-use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
+use common_meta::ddl::flow_meta::FlowMetadataAllocator;
+use common_meta::ddl::table_meta::TableMetadataAllocator;
 use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
 use common_meta::ddl_manager::DdlManager;
-#[cfg(feature = "enterprise")]
-use common_meta::ddl_manager::TriggerDdlManagerRef;
 use common_meta::key::flow::flow_state::FlowStat;
-use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
+use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
-use common_meta::node_manager::NodeManagerRef;
 use common_meta::peer::Peer;
 use common_meta::region_keeper::MemoryRegionKeeper;
 use common_meta::region_registry::LeaderRegionRegistry;
@@ -55,7 +51,7 @@ use common_telemetry::logging::{
    LoggingOptions, SlowQueryOptions, TracingOptions, DEFAULT_LOGGING_DIR,
 };
 use common_time::timezone::set_default_timezone;
-use common_version::{short_version, version};
+use common_version::{short_version, verbose_version};
 use common_wal::config::DatanodeWalConfig;
 use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
 use datanode::datanode::{Datanode, DatanodeBuilder};
@@ -261,15 +257,34 @@ pub struct Instance {
    flownode: FlownodeInstance,
    procedure_manager: ProcedureManagerRef,
    wal_options_allocator: WalOptionsAllocatorRef,
+
+    // The components of standalone, which make it easier to expand based
+    // on the components.
+    #[cfg(feature = "enterprise")]
+    components: Components,
+
    // Keep the logging guard to prevent the worker from being dropped.
    _guard: Vec<WorkerGuard>,
 }

+#[cfg(feature = "enterprise")]
+pub struct Components {
+    pub plugins: Plugins,
+    pub kv_backend: KvBackendRef,
+    pub frontend_client: Arc<FrontendClient>,
+    pub catalog_manager: catalog::CatalogManagerRef,
+}
+
 impl Instance {
    /// Find the socket addr of a server by its `name`.
    pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
        self.frontend.server_handlers().addr(name)
    }
+
+    #[cfg(feature = "enterprise")]
+    pub fn components(&self) -> &Components {
+        &self.components
+    }
 }

 #[async_trait]
@@ -470,7 +485,7 @@ impl StartCommand {
            opts.component.slow_query.as_ref(),
        );

-        log_versions(version(), short_version(), APP_NAME);
+        log_versions(verbose_version(), short_version(), APP_NAME);
        create_resource_limit_metrics(APP_NAME);

        info!("Standalone start command: {:#?}", self);
@@ -529,13 +544,20 @@ impl StartCommand {
        ));

        let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
-        let catalog_manager = KvBackendCatalogManager::new(
+        let builder = KvBackendCatalogManagerBuilder::new(
            information_extension.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
-            Some(procedure_manager.clone()),
-            Some(process_manager.clone()),
-        );
+        )
+        .with_procedure_manager(procedure_manager.clone())
+        .with_process_manager(process_manager.clone());
+        #[cfg(feature = "enterprise")]
+        let builder = if let Some(factories) = plugins.get() {
+            builder.with_extra_information_table_factories(factories)
+        } else {
+            builder
+        };
+        let catalog_manager = builder.build();

        let table_metadata_manager =
            Self::create_table_metadata_manager(kv_backend.clone()).await?;
@@ -549,14 +571,15 @@ impl StartCommand {
        // for standalone not use grpc, but get a handler to frontend grpc client without
        // actually make a connection
        let (frontend_client, frontend_instance_handler) =
-            FrontendClient::from_empty_grpc_handler();
+            FrontendClient::from_empty_grpc_handler(opts.query.clone());
+        let frontend_client = Arc::new(frontend_client);
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
-            Arc::new(frontend_client.clone()),
+            frontend_client.clone(),
        );
        let flownode = flow_builder
            .build()
@@ -594,28 +617,36 @@ impl StartCommand {
            .await
            .context(error::BuildWalOptionsAllocatorSnafu)?;
        let wal_options_allocator = Arc::new(wal_options_allocator);
-        let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
+        let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
            table_id_sequence,
            wal_options_allocator.clone(),
        ));
-        let flow_meta_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
+        let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
            flow_id_sequence,
        ));

+        let ddl_context = DdlContext {
+            node_manager: node_manager.clone(),
+            cache_invalidator: layered_cache_registry.clone(),
+            memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
+            leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
+            table_metadata_manager: table_metadata_manager.clone(),
+            table_metadata_allocator: table_metadata_allocator.clone(),
+            flow_metadata_manager: flow_metadata_manager.clone(),
+            flow_metadata_allocator: flow_metadata_allocator.clone(),
+            region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
+        };
+        let procedure_manager_c = procedure_manager.clone();
+
+        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
+            .context(error::InitDdlManagerSnafu)?;
        #[cfg(feature = "enterprise")]
-        let trigger_ddl_manager: Option<TriggerDdlManagerRef> = plugins.get();
-        let ddl_task_executor = Self::create_ddl_task_executor(
-            procedure_manager.clone(),
-            node_manager.clone(),
-            layered_cache_registry.clone(),
-            table_metadata_manager,
-            table_meta_allocator,
-            flow_metadata_manager,
-            flow_meta_allocator,
-            #[cfg(feature = "enterprise")]
-            trigger_ddl_manager,
-        )
-        .await?;
+        let ddl_manager = {
+            let trigger_ddl_manager: Option<common_meta::ddl_manager::TriggerDdlManagerRef> =
+                plugins.get();
+            ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
+        };
+        let ddl_task_executor: ProcedureExecutorRef = Arc::new(ddl_manager);

        let fe_instance = FrontendBuilder::new(
            fe_opts.clone(),
@@ -658,7 +689,7 @@ impl StartCommand {
        let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
            .context(error::ServersSnafu)?;

-        let servers = Services::new(opts, fe_instance.clone(), plugins)
+        let servers = Services::new(opts, fe_instance.clone(), plugins.clone())
            .build()
            .context(error::StartFrontendSnafu)?;

@@ -669,51 +700,26 @@ impl StartCommand {
            export_metrics_task,
        };

+        #[cfg(feature = "enterprise")]
+        let components = Components {
+            plugins,
+            kv_backend,
+            frontend_client,
+            catalog_manager,
+        };
+
        Ok(Instance {
            datanode,
            frontend,
            flownode,
            procedure_manager,
            wal_options_allocator,
+            #[cfg(feature = "enterprise")]
+            components,
            _guard: guard,
        })
    }

-    #[allow(clippy::too_many_arguments)]
-    pub async fn create_ddl_task_executor(
-        procedure_manager: ProcedureManagerRef,
-        node_manager: NodeManagerRef,
-        cache_invalidator: CacheInvalidatorRef,
-        table_metadata_manager: TableMetadataManagerRef,
-        table_metadata_allocator: TableMetadataAllocatorRef,
-        flow_metadata_manager: FlowMetadataManagerRef,
-        flow_metadata_allocator: FlowMetadataAllocatorRef,
-        #[cfg(feature = "enterprise")] trigger_ddl_manager: Option<TriggerDdlManagerRef>,
-    ) -> Result<ProcedureExecutorRef> {
-        let procedure_executor: ProcedureExecutorRef = Arc::new(
-            DdlManager::try_new(
-                DdlContext {
-                    node_manager,
-                    cache_invalidator,
-                    memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
-                    leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
-                    table_metadata_manager,
-                    table_metadata_allocator,
-                    flow_metadata_manager,
-                    flow_metadata_allocator,
-                    region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
-                },
-                procedure_manager,
-                true,
-                #[cfg(feature = "enterprise")]
-                trigger_ddl_manager,
-            )
-            .context(error::InitDdlManagerSnafu)?,
-        );
-
-        Ok(procedure_executor)
-    }
-
    pub async fn create_table_metadata_manager(
        kv_backend: KvBackendRef,
    ) -> Result<TableMetadataManagerRef> {
@@ -815,6 +821,7 @@ impl InformationExtension for StandaloneInformationExtension {
                    memtable_size: region_stat.memtable_size,
                    manifest_size: region_stat.manifest_size,
                    sst_size: region_stat.sst_size,
+                    sst_num: region_stat.sst_num,
                    index_size: region_stat.index_size,
                    region_manifest: region_stat.manifest.into(),
                    data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
@@ -849,7 +856,7 @@ mod tests {
    use common_config::ENV_VAR_SEP;
    use common_test_util::temp_dir::create_named_temp_file;
    use common_wal::config::DatanodeWalConfig;
-    use datanode::config::{FileConfig, GcsConfig};
+    use object_store::config::{FileConfig, GcsConfig};

    use super::*;
    use crate::options::GlobalOptions;
@@ -968,15 +975,15 @@ mod tests {

        assert!(matches!(
            &dn_opts.storage.store,
-            datanode::config::ObjectStoreConfig::File(FileConfig { .. })
+            object_store::config::ObjectStoreConfig::File(FileConfig { .. })
        ));
        assert_eq!(dn_opts.storage.providers.len(), 2);
        assert!(matches!(
            dn_opts.storage.providers[0],
-            datanode::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
+            object_store::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
        ));
        match &dn_opts.storage.providers[1] {
-            datanode::config::ObjectStoreConfig::S3(s3_config) => {
+            object_store::config::ObjectStoreConfig::S3(s3_config) => {
                assert_eq!(
                    "SecretBox<alloc::string::String>([REDACTED])".to_string(),
                    format!("{:?}", s3_config.access_key_id)
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -18,17 +18,19 @@ use cmd::options::GreptimeOptions;
 use cmd::standalone::StandaloneOptions;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_options::datanode::{ClientOptions, DatanodeClientOptions};
-use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_ENDPOINT};
+use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_HTTP_ENDPOINT};
 use common_wal::config::raft_engine::RaftEngineConfig;
 use common_wal::config::DatanodeWalConfig;
 use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
 use file_engine::config::EngineConfig as FileEngineConfig;
+use flow::FlownodeOptions;
 use frontend::frontend::FrontendOptions;
 use meta_client::MetaClientOptions;
 use meta_srv::metasrv::MetasrvOptions;
 use meta_srv::selector::SelectorType;
 use metric_engine::config::EngineConfig as MetricEngineConfig;
 use mito2::config::MitoConfig;
+use query::options::QueryOptions;
 use servers::export_metrics::ExportMetricsOption;
 use servers::grpc::GrpcOptions;
 use servers::http::HttpOptions;
@@ -81,7 +83,7 @@ fn test_load_datanode_example_config() {
            logging: LoggingOptions {
                level: Some("info".to_string()),
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -124,7 +126,7 @@ fn test_load_frontend_example_config() {
            logging: LoggingOptions {
                level: Some("info".to_string()),
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -172,7 +174,7 @@ fn test_load_metasrv_example_config() {
            logging: LoggingOptions {
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -195,6 +197,57 @@ fn test_load_metasrv_example_config() {
    similar_asserts::assert_eq!(options, expected);
 }

+#[test]
+fn test_load_flownode_example_config() {
+    let example_config = common_test_util::find_workspace_path("config/flownode.example.toml");
+    let options =
+        GreptimeOptions::<FlownodeOptions>::load_layered_options(example_config.to_str(), "")
+            .unwrap();
+    let expected = GreptimeOptions::<FlownodeOptions> {
+        component: FlownodeOptions {
+            node_id: Some(14),
+            flow: Default::default(),
+            grpc: GrpcOptions {
+                bind_addr: "127.0.0.1:6800".to_string(),
+                server_addr: "127.0.0.1:6800".to_string(),
+                runtime_size: 2,
+                ..Default::default()
+            },
+            logging: LoggingOptions {
+                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
+                level: Some("info".to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
+                otlp_export_protocol: Some(common_telemetry::logging::OtlpExportProtocol::Http),
+                tracing_sample_ratio: Some(Default::default()),
+                ..Default::default()
+            },
+            tracing: Default::default(),
+            heartbeat: Default::default(),
+            // flownode deliberately use a slower query parallelism
+            // to avoid overwhelming the frontend with too many queries
+            query: QueryOptions { parallelism: 1 },
+            meta_client: Some(MetaClientOptions {
+                metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
+                timeout: Duration::from_secs(3),
+                heartbeat_timeout: Duration::from_millis(500),
+                ddl_timeout: Duration::from_secs(10),
+                connect_timeout: Duration::from_secs(1),
+                tcp_nodelay: true,
+                metadata_cache_max_capacity: 100000,
+                metadata_cache_ttl: Duration::from_secs(600),
+                metadata_cache_tti: Duration::from_secs(300),
+            }),
+            http: HttpOptions {
+                addr: "127.0.0.1:4000".to_string(),
+                ..Default::default()
+            },
+            user_provider: None,
+        },
+        ..Default::default()
+    };
+    similar_asserts::assert_eq!(options, expected);
+}
+
 #[test]
 fn test_load_standalone_example_config() {
    let example_config = common_test_util::find_workspace_path("config/standalone.example.toml");
@@ -229,7 +282,7 @@ fn test_load_standalone_example_config() {
            logging: LoggingOptions {
                level: Some("info".to_string()),
                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -78,7 +78,7 @@ pub const INFORMATION_SCHEMA_ROUTINES_TABLE_ID: u32 = 21;
 pub const INFORMATION_SCHEMA_SCHEMA_PRIVILEGES_TABLE_ID: u32 = 22;
 /// id for information_schema.TABLE_PRIVILEGES
 pub const INFORMATION_SCHEMA_TABLE_PRIVILEGES_TABLE_ID: u32 = 23;
-/// id for information_schema.TRIGGERS
+/// id for information_schema.TRIGGERS (for mysql)
 pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
 /// id for information_schema.GLOBAL_STATUS
 pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -14,6 +14,7 @@ common-macro.workspace = true
 config.workspace = true
 humantime-serde.workspace = true
 num_cpus.workspace = true
+object-store.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_with.workspace = true
--- a/src/common/config/src/config.rs
+++ b/src/common/config/src/config.rs
@@ -106,7 +106,7 @@ mod tests {
    use common_telemetry::logging::LoggingOptions;
    use common_test_util::temp_dir::create_named_temp_file;
    use common_wal::config::DatanodeWalConfig;
-    use datanode::config::{ObjectStoreConfig, StorageConfig};
+    use datanode::config::StorageConfig;
    use meta_client::MetaClientOptions;
    use serde::{Deserialize, Serialize};

@@ -212,7 +212,7 @@ mod tests {

                // Check the configs from environment variables.
                match &opts.storage.store {
-                    ObjectStoreConfig::S3(s3_config) => {
+                    object_store::config::ObjectStoreConfig::S3(s3_config) => {
                        assert_eq!(s3_config.bucket, "mybucket".to_string());
                    }
                    _ => panic!("unexpected store type"),
--- a/src/common/datasource/src/lib.rs
+++ b/src/common/datasource/src/lib.rs
@@ -21,6 +21,7 @@ pub mod error;
 pub mod file_format;
 pub mod lister;
 pub mod object_store;
+pub mod parquet_writer;
 pub mod share_buffer;
 #[cfg(test)]
 pub mod test_util;
--- a/src/common/datasource/src/object_store/oss.rs
+++ b/src/common/datasource/src/object_store/oss.rs
@@ -77,6 +77,11 @@ pub fn build_oss_backend(

    let op = ObjectStore::new(builder)
        .context(error::BuildBackendSnafu)?
+        .layer(
+            object_store::layers::RetryLayer::new()
+                .with_jitter()
+                .with_notify(object_store::util::PrintDetailedError),
+        )
        .layer(object_store::layers::LoggingLayer::default())
        .layer(object_store::layers::TracingLayer)
        .layer(object_store::layers::build_prometheus_metrics_layer(true))
--- a/src/common/datasource/src/object_store/s3.rs
+++ b/src/common/datasource/src/object_store/s3.rs
@@ -85,6 +85,11 @@ pub fn build_s3_backend(
    // TODO(weny): Consider finding a better way to eliminate duplicate code.
    Ok(ObjectStore::new(builder)
        .context(error::BuildBackendSnafu)?
+        .layer(
+            object_store::layers::RetryLayer::new()
+                .with_jitter()
+                .with_notify(object_store::util::PrintDetailedError),
+        )
        .layer(object_store::layers::LoggingLayer::new(
            DefaultLoggingInterceptor,
        ))
--- a/src/common/datasource/src/parquet_writer.rs
+++ b/src/common/datasource/src/parquet_writer.rs
@@ -0,0 +1,52 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use bytes::Bytes;
+use futures::future::BoxFuture;
+use object_store::Writer;
+use parquet::arrow::async_writer::AsyncFileWriter;
+use parquet::errors::ParquetError;
+
+/// Bridges opendal [Writer] with parquet [AsyncFileWriter].
+pub struct AsyncWriter {
+    inner: Writer,
+}
+
+impl AsyncWriter {
+    /// Create a [`AsyncWriter`] by given [`Writer`].
+    pub fn new(writer: Writer) -> Self {
+        Self { inner: writer }
+    }
+}
+
+impl AsyncFileWriter for AsyncWriter {
+    fn write(&mut self, bs: Bytes) -> BoxFuture<'_, parquet::errors::Result<()>> {
+        Box::pin(async move {
+            self.inner
+                .write(bs)
+                .await
+                .map_err(|err| ParquetError::External(Box::new(err)))
+        })
+    }
+
+    fn complete(&mut self) -> BoxFuture<'_, parquet::errors::Result<()>> {
+        Box::pin(async move {
+            self.inner
+                .close()
+                .await
+                .map(|_| ())
+                .map_err(|err| ParquetError::External(Box::new(err)))
+        })
+    }
+}
--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -119,6 +119,11 @@ pub enum StatusCode {
    FlowAlreadyExists = 8000,
    FlowNotFound = 8001,
    // ====== End of flow related status code =====
+
+    // ====== Begin of trigger related status code =====
+    TriggerAlreadyExists = 9000,
+    TriggerNotFound = 9001,
+    // ====== End of trigger related status code =====
 }

 impl StatusCode {
@@ -155,6 +160,8 @@ impl StatusCode {
            | StatusCode::RegionNotFound
            | StatusCode::FlowAlreadyExists
            | StatusCode::FlowNotFound
+            | StatusCode::TriggerAlreadyExists
+            | StatusCode::TriggerNotFound
            | StatusCode::RegionReadonly
            | StatusCode::TableColumnNotFound
            | StatusCode::TableColumnExists
@@ -198,6 +205,8 @@ impl StatusCode {
            | StatusCode::PlanQuery
            | StatusCode::FlowAlreadyExists
            | StatusCode::FlowNotFound
+            | StatusCode::TriggerAlreadyExists
+            | StatusCode::TriggerNotFound
            | StatusCode::RegionNotReady
            | StatusCode::RegionBusy
            | StatusCode::RegionReadonly
@@ -230,6 +239,48 @@ impl fmt::Display for StatusCode {
    }
 }

+#[macro_export]
+macro_rules! define_from_tonic_status {
+    ($Error: ty, $Variant: ident) => {
+        impl From<tonic::Status> for $Error {
+            fn from(e: tonic::Status) -> Self {
+                use snafu::location;
+
+                fn metadata_value(e: &tonic::Status, key: &str) -> Option<String> {
+                    e.metadata()
+                        .get(key)
+                        .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
+                }
+
+                let code = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_CODE)
+                    .and_then(|s| {
+                        if let Ok(code) = s.parse::<u32>() {
+                            StatusCode::from_u32(code)
+                        } else {
+                            None
+                        }
+                    })
+                    .unwrap_or_else(|| match e.code() {
+                        tonic::Code::Cancelled => StatusCode::Cancelled,
+                        tonic::Code::DeadlineExceeded => StatusCode::DeadlineExceeded,
+                        _ => StatusCode::Internal,
+                    });
+
+                let msg = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_MSG)
+                    .unwrap_or_else(|| e.message().to_string());
+
+                // TODO(LFC): Make the error variant defined automatically.
+                Self::$Variant {
+                    code,
+                    msg,
+                    tonic_code: e.code(),
+                    location: location!(),
+                }
+            }
+        }
+    };
+}
+
 #[macro_export]
 macro_rules! define_into_tonic_status {
    ($Error: ty) => {
@@ -281,12 +332,14 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
        | StatusCode::TableColumnExists
        | StatusCode::RegionAlreadyExists
        | StatusCode::DatabaseAlreadyExists
+        | StatusCode::TriggerAlreadyExists
        | StatusCode::FlowAlreadyExists => Code::AlreadyExists,
        StatusCode::TableNotFound
        | StatusCode::RegionNotFound
        | StatusCode::TableColumnNotFound
        | StatusCode::DatabaseNotFound
        | StatusCode::UserNotFound
+        | StatusCode::TriggerNotFound
        | StatusCode::FlowNotFound => Code::NotFound,
        StatusCode::TableUnavailable
        | StatusCode::StorageUnavailable
@@ -304,15 +357,6 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
    }
 }

-/// Converts tonic [Code] to [StatusCode].
-pub fn convert_tonic_code_to_status_code(code: Code) -> StatusCode {
-    match code {
-        Code::Cancelled => StatusCode::Cancelled,
-        Code::DeadlineExceeded => StatusCode::DeadlineExceeded,
-        _ => StatusCode::Internal,
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use strum::IntoEnumIterator;
--- a/src/common/error/tests/ext.rs
+++ b/src/common/error/tests/ext.rs
@@ -84,24 +84,33 @@ fn test_to_string() {
    assert_eq!(result.unwrap_err().to_string(), "<root cause>");
 }

+fn normalize_path(s: &str) -> String {
+    s.replace('\\', "/")
+}
+
 #[test]
 fn test_debug_format() {
    let result = normal_error();
    let debug_output = format!("{:?}", result.unwrap_err());
-    let normalized_output = debug_output.replace('\\', "/");
+
    assert_eq!(
-        normalized_output,
-        r#"0: A normal error with "display" attribute, message "blabla", at src/common/error/tests/ext.rs:55:22
-1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
+        normalize_path(&debug_output),
+        format!(
+            r#"0: A normal error with "display" attribute, message "blabla", at {}:55:22
+1: PlainError {{ msg: "<root cause>", status_code: Unexpected }}"#,
+            normalize_path(file!())
+        )
    );

    let result = transparent_error();
    let debug_output = format!("{:?}", result.unwrap_err());
-    let normalized_output = debug_output.replace('\\', "/");
    assert_eq!(
-        normalized_output,
-        r#"0: <transparent>, at src/common/error/tests/ext.rs:60:5
-1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
+        normalize_path(&debug_output),
+        format!(
+            r#"0: <transparent>, at {}:60:5
+1: PlainError {{ msg: "<root cause>", status_code: Unexpected }}"#,
+            normalize_path(file!())
+        )
    );
 }

--- a/src/common/event-recorder/Cargo.toml
+++ b/src/common/event-recorder/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "common-event-recorder"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+api.workspace = true
+async-trait.workspace = true
+backon.workspace = true
+client.workspace = true
+common-error.workspace = true
+common-macro.workspace = true
+common-meta.workspace = true
+common-telemetry.workspace = true
+common-time.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+snafu.workspace = true
+store-api.workspace = true
+tokio.workspace = true
+tokio-util.workspace = true
+
+[lints]
+workspace = true
--- a/src/common/event-recorder/src/error.rs
+++ b/src/common/event-recorder/src/error.rs
@@ -0,0 +1,53 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::ColumnSchema;
+use common_error::ext::ErrorExt;
+use common_error::status_code::StatusCode;
+use common_macro::stack_trace_debug;
+use snafu::{Location, Snafu};
+
+#[derive(Snafu)]
+#[snafu(visibility(pub))]
+#[stack_trace_debug]
+pub enum Error {
+    #[snafu(display("No available frontend"))]
+    NoAvailableFrontend {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Mismatched schema, expected: {:?}, actual: {:?}", expected, actual))]
+    MismatchedSchema {
+        #[snafu(implicit)]
+        location: Location,
+        expected: Vec<ColumnSchema>,
+        actual: Vec<ColumnSchema>,
+    },
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl ErrorExt for Error {
+    fn status_code(&self) -> StatusCode {
+        match self {
+            Error::MismatchedSchema { .. } => StatusCode::InvalidArguments,
+            Error::NoAvailableFrontend { .. } => StatusCode::Internal,
+        }
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+}
--- a/src/common/event-recorder/src/lib.rs
+++ b/src/common/event-recorder/src/lib.rs
@@ -0,0 +1,18 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod error;
+pub mod recorder;
+
+pub use recorder::*;
--- a/src/common/event-recorder/src/recorder.rs
+++ b/src/common/event-recorder/src/recorder.rs
@@ -0,0 +1,527 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::sync::{Arc, OnceLock};
+use std::time::Duration;
+
+use api::v1::column_data_type_extension::TypeExt;
+use api::v1::value::ValueData;
+use api::v1::{
+    ColumnDataType, ColumnDataTypeExtension, ColumnSchema, JsonTypeExtension, Row,
+    RowInsertRequest, RowInsertRequests, Rows, SemanticType,
+};
+use async_trait::async_trait;
+use backon::{BackoffBuilder, ExponentialBuilder};
+use common_telemetry::{debug, error, info, warn};
+use common_time::timestamp::{TimeUnit, Timestamp};
+use serde::{Deserialize, Serialize};
+use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY};
+use tokio::sync::mpsc::{channel, Receiver, Sender};
+use tokio::task::JoinHandle;
+use tokio::time::sleep;
+use tokio_util::sync::CancellationToken;
+
+use crate::error::{MismatchedSchemaSnafu, Result};
+
+/// The default table name for storing the events.
+pub const DEFAULT_EVENTS_TABLE_NAME: &str = "events";
+
+/// The column name for the event type.
+pub const EVENTS_TABLE_TYPE_COLUMN_NAME: &str = "type";
+/// The column name for the event payload.
+pub const EVENTS_TABLE_PAYLOAD_COLUMN_NAME: &str = "payload";
+/// The column name for the event timestamp.
+pub const EVENTS_TABLE_TIMESTAMP_COLUMN_NAME: &str = "timestamp";
+
+/// EventRecorderRef is the reference to the event recorder.
+pub type EventRecorderRef = Arc<dyn EventRecorder>;
+
+static EVENTS_TABLE_TTL: OnceLock<String> = OnceLock::new();
+
+/// The time interval for flushing batched events to the event handler.
+pub const DEFAULT_FLUSH_INTERVAL_SECONDS: Duration = Duration::from_secs(5);
+// The default TTL for the events table.
+const DEFAULT_EVENTS_TABLE_TTL: &str = "30d";
+// The capacity of the tokio channel for transmitting events to background processor.
+const DEFAULT_CHANNEL_SIZE: usize = 2048;
+// The size of the buffer for batching events before flushing to event handler.
+const DEFAULT_BUFFER_SIZE: usize = 100;
+// The maximum number of retry attempts when event handler processing fails.
+const DEFAULT_MAX_RETRY_TIMES: u64 = 3;
+
+/// Event trait defines the interface for events that can be recorded and persisted as the system table.
+/// By default, the event will be persisted as the system table with the following schema:
+///
+/// - `type`: the type of the event.
+/// - `payload`: the JSON bytes of the event.
+/// - `timestamp`: the timestamp of the event.
+///
+/// The event can also add the extra schema and row to the event by overriding the `extra_schema` and `extra_row` methods.
+pub trait Event: Send + Sync + Debug {
+    /// Returns the type of the event.
+    fn event_type(&self) -> &str;
+
+    /// Returns the timestamp of the event. Default to the current time.
+    fn timestamp(&self) -> Timestamp {
+        Timestamp::current_time(TimeUnit::Nanosecond)
+    }
+
+    /// Returns the JSON bytes of the event as the payload. It will use JSON type to store the payload.
+    fn json_payload(&self) -> Result<String>;
+
+    /// Add the extra schema to the event with the default schema.
+    fn extra_schema(&self) -> Vec<ColumnSchema> {
+        vec![]
+    }
+
+    /// Add the extra row to the event with the default row.
+    fn extra_row(&self) -> Result<Row> {
+        Ok(Row { values: vec![] })
+    }
+
+    /// Returns the event as any type.
+    fn as_any(&self) -> &dyn Any;
+}
+
+/// Returns the hints for the insert operation.
+pub fn insert_hints() -> Vec<(&'static str, &'static str)> {
+    vec![
+        (
+            TTL_KEY,
+            EVENTS_TABLE_TTL
+                .get()
+                .map(|s| s.as_str())
+                .unwrap_or(DEFAULT_EVENTS_TABLE_TTL),
+        ),
+        (APPEND_MODE_KEY, "true"),
+    ]
+}
+
+/// Builds the row inserts request for the events that will be persisted to the events table.
+pub fn build_row_inserts_request(events: &[Box<dyn Event>]) -> Result<RowInsertRequests> {
+    // Aggregate the events by the event type.
+    let mut event_groups: HashMap<&str, Vec<&Box<dyn Event>>> = HashMap::new();
+
+    for event in events {
+        event_groups
+            .entry(event.event_type())
+            .or_default()
+            .push(event);
+    }
+
+    let mut row_insert_requests = RowInsertRequests {
+        inserts: Vec::with_capacity(event_groups.len()),
+    };
+
+    for (_, events) in event_groups {
+        validate_events(&events)?;
+
+        // We already validated the events, so it's safe to get the first event to build the schema for the RowInsertRequest.
+        let event = &events[0];
+        let mut schema = vec![
+            ColumnSchema {
+                column_name: EVENTS_TABLE_TYPE_COLUMN_NAME.to_string(),
+                datatype: ColumnDataType::String.into(),
+                semantic_type: SemanticType::Tag.into(),
+                ..Default::default()
+            },
+            ColumnSchema {
+                column_name: EVENTS_TABLE_PAYLOAD_COLUMN_NAME.to_string(),
+                datatype: ColumnDataType::Binary as i32,
+                semantic_type: SemanticType::Field as i32,
+                datatype_extension: Some(ColumnDataTypeExtension {
+                    type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
+                }),
+                ..Default::default()
+            },
+            ColumnSchema {
+                column_name: EVENTS_TABLE_TIMESTAMP_COLUMN_NAME.to_string(),
+                datatype: ColumnDataType::TimestampNanosecond.into(),
+                semantic_type: SemanticType::Timestamp.into(),
+                ..Default::default()
+            },
+        ];
+        schema.extend(event.extra_schema());
+
+        let rows = events
+            .iter()
+            .map(|event| {
+                let mut row = Row {
+                    values: vec![
+                        ValueData::StringValue(event.event_type().to_string()).into(),
+                        ValueData::BinaryValue(event.json_payload()?.as_bytes().to_vec()).into(),
+                        ValueData::TimestampNanosecondValue(event.timestamp().value()).into(),
+                    ],
+                };
+                row.values.extend(event.extra_row()?.values);
+                Ok(row)
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        row_insert_requests.inserts.push(RowInsertRequest {
+            table_name: DEFAULT_EVENTS_TABLE_NAME.to_string(),
+            rows: Some(Rows { schema, rows }),
+        });
+    }
+
+    Ok(row_insert_requests)
+}
+
+// Ensure the events with the same event type have the same extra schema.
+#[allow(clippy::borrowed_box)]
+fn validate_events(events: &[&Box<dyn Event>]) -> Result<()> {
+    // It's safe to get the first event because the events are already grouped by the event type.
+    let extra_schema = events[0].extra_schema();
+    for event in events {
+        if event.extra_schema() != extra_schema {
+            MismatchedSchemaSnafu {
+                expected: extra_schema.clone(),
+                actual: event.extra_schema(),
+            }
+            .fail()?;
+        }
+    }
+    Ok(())
+}
+
+/// EventRecorder trait defines the interface for recording events.
+pub trait EventRecorder: Send + Sync + 'static {
+    /// Records an event for persistence and processing by [EventHandler].
+    fn record(&self, event: Box<dyn Event>);
+
+    /// Cancels the event recorder.
+    fn close(&self);
+}
+
+/// EventHandler trait defines the interface for how to handle the event.
+#[async_trait]
+pub trait EventHandler: Send + Sync + 'static {
+    /// Processes and handles incoming events. The [DefaultEventHandlerImpl] implementation forwards events to frontend instances for persistence.
+    /// We use `&[Box<dyn Event>]` to avoid consuming the events, so the caller can buffer the events and retry if the handler fails.
+    async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()>;
+}
+
+/// Configuration options for the event recorder.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct EventRecorderOptions {
+    /// TTL for the events table that will be used to store the events.
+    pub ttl: String,
+}
+
+impl Default for EventRecorderOptions {
+    fn default() -> Self {
+        Self {
+            ttl: DEFAULT_EVENTS_TABLE_TTL.to_string(),
+        }
+    }
+}
+
+/// Implementation of [EventRecorder] that records the events and processes them in the background by the [EventHandler].
+pub struct EventRecorderImpl {
+    // The channel to send the events to the background processor.
+    tx: Sender<Box<dyn Event>>,
+    // The cancel token to cancel the background processor.
+    cancel_token: CancellationToken,
+    // The background processor to process the events.
+    handle: Option<JoinHandle<()>>,
+}
+
+impl EventRecorderImpl {
+    pub fn new(event_handler: Box<dyn EventHandler>, opts: EventRecorderOptions) -> Self {
+        info!("Creating event recorder with options: {:?}", opts);
+
+        let (tx, rx) = channel(DEFAULT_CHANNEL_SIZE);
+        let cancel_token = CancellationToken::new();
+
+        let mut recorder = Self {
+            tx,
+            handle: None,
+            cancel_token: cancel_token.clone(),
+        };
+
+        let processor = EventProcessor::new(
+            rx,
+            event_handler,
+            DEFAULT_FLUSH_INTERVAL_SECONDS,
+            DEFAULT_MAX_RETRY_TIMES,
+        )
+        .with_cancel_token(cancel_token);
+
+        // Spawn a background task to process the events.
+        let handle = tokio::spawn(async move {
+            processor.process(DEFAULT_BUFFER_SIZE).await;
+        });
+
+        recorder.handle = Some(handle);
+
+        // It only sets the ttl once, so it's safe to skip the error.
+        if EVENTS_TABLE_TTL.set(opts.ttl.clone()).is_err() {
+            info!(
+                "Events table ttl already set to {}, skip setting it",
+                opts.ttl
+            );
+        }
+
+        recorder
+    }
+}
+
+impl EventRecorder for EventRecorderImpl {
+    // Accepts an event and send it to the background handler.
+    fn record(&self, event: Box<dyn Event>) {
+        if let Err(e) = self.tx.try_send(event) {
+            error!("Failed to send event to the background processor: {}", e);
+        }
+    }
+
+    // Closes the event recorder. It will stop the background processor and flush the buffer.
+    fn close(&self) {
+        self.cancel_token.cancel();
+    }
+}
+
+impl Drop for EventRecorderImpl {
+    fn drop(&mut self) {
+        if let Some(handle) = self.handle.take() {
+            handle.abort();
+            info!("Aborted the background processor in event recorder");
+        }
+    }
+}
+
+struct EventProcessor {
+    rx: Receiver<Box<dyn Event>>,
+    event_handler: Box<dyn EventHandler>,
+    max_retry_times: u64,
+    process_interval: Duration,
+    cancel_token: CancellationToken,
+}
+
+impl EventProcessor {
+    fn new(
+        rx: Receiver<Box<dyn Event>>,
+        event_handler: Box<dyn EventHandler>,
+        process_interval: Duration,
+        max_retry_times: u64,
+    ) -> Self {
+        Self {
+            rx,
+            event_handler,
+            max_retry_times,
+            process_interval,
+            cancel_token: CancellationToken::new(),
+        }
+    }
+
+    fn with_cancel_token(mut self, cancel_token: CancellationToken) -> Self {
+        self.cancel_token = cancel_token;
+        self
+    }
+
+    async fn process(mut self, buffer_size: usize) {
+        info!("Start the background processor in event recorder to handle the received events.");
+
+        let mut buffer = Vec::with_capacity(buffer_size);
+        let mut interval = tokio::time::interval(self.process_interval);
+
+        loop {
+            tokio::select! {
+                maybe_event = self.rx.recv() => {
+                    if let Some(maybe_event) = maybe_event {
+                        debug!("Received event: {:?}", maybe_event);
+
+                        if buffer.len() >= buffer_size {
+                            debug!(
+                                "Flushing events to the event handler because the buffer is full with {} events",
+                                buffer.len()
+                            );
+                            self.flush_events_to_handler(&mut buffer).await;
+                        }
+
+                        // Push the event to the buffer, the buffer will be flushed when the interval is triggered or received a closed signal.
+                        buffer.push(maybe_event);
+                    } else {
+                        // When received a closed signal, flush the buffer and exit the loop.
+                        self.flush_events_to_handler(&mut buffer).await;
+                        break;
+                    }
+                }
+                // Cancel the processor through the cancel token.
+                _ = self.cancel_token.cancelled() => {
+                    warn!("Received a cancel signal, flushing the buffer and exiting the loop");
+                    self.flush_events_to_handler(&mut buffer).await;
+                    break;
+                }
+                // When the interval is triggered, flush the buffer and send the events to the event handler.
+                _ = interval.tick() => {
+                    self.flush_events_to_handler(&mut buffer).await;
+                }
+            }
+        }
+    }
+
+    // NOTE: While we implement a retry mechanism for failed event handling, there is no guarantee that all events will be processed successfully.
+    async fn flush_events_to_handler(&self, buffer: &mut Vec<Box<dyn Event>>) {
+        if !buffer.is_empty() {
+            debug!("Flushing {} events to the event handler", buffer.len());
+
+            let mut backoff = ExponentialBuilder::default()
+                .with_min_delay(Duration::from_millis(
+                    DEFAULT_FLUSH_INTERVAL_SECONDS.as_millis() as u64 / self.max_retry_times.max(1),
+                ))
+                .with_max_delay(Duration::from_millis(
+                    DEFAULT_FLUSH_INTERVAL_SECONDS.as_millis() as u64,
+                ))
+                .with_max_times(self.max_retry_times as usize)
+                .build();
+
+            loop {
+                match self.event_handler.handle(buffer).await {
+                    Ok(()) => {
+                        debug!("Successfully handled {} events", buffer.len());
+                        break;
+                    }
+                    Err(e) => {
+                        if let Some(d) = backoff.next() {
+                            warn!(e; "Failed to handle events, retrying...");
+                            sleep(d).await;
+                            continue;
+                        } else {
+                            warn!(
+                                e; "Failed to handle events after {} retries",
+                                self.max_retry_times
+                            );
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        // Clear the buffer to prevent unbounded memory growth, regardless of whether event processing succeeded or failed.
+        buffer.clear();
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Debug)]
+    struct TestEvent {}
+
+    impl Event for TestEvent {
+        fn event_type(&self) -> &str {
+            "test_event"
+        }
+
+        fn json_payload(&self) -> Result<String> {
+            Ok("{\"procedure_id\": \"1234567890\"}".to_string())
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+    }
+
+    struct TestEventHandlerImpl {}
+
+    #[async_trait]
+    impl EventHandler for TestEventHandlerImpl {
+        async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()> {
+            let event = events
+                .first()
+                .unwrap()
+                .as_any()
+                .downcast_ref::<TestEvent>()
+                .unwrap();
+            assert_eq!(
+                event.json_payload().unwrap(),
+                "{\"procedure_id\": \"1234567890\"}"
+            );
+            assert_eq!(event.event_type(), "test_event");
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_event_recorder() {
+        let mut event_recorder = EventRecorderImpl::new(
+            Box::new(TestEventHandlerImpl {}),
+            EventRecorderOptions::default(),
+        );
+        event_recorder.record(Box::new(TestEvent {}));
+
+        // Sleep for a while to let the event be sent to the event handler.
+        sleep(Duration::from_millis(500)).await;
+
+        // Close the event recorder to flush the buffer.
+        event_recorder.close();
+
+        // Sleep for a while to let the background task process the event.
+        sleep(Duration::from_millis(500)).await;
+
+        if let Some(handle) = event_recorder.handle.take() {
+            assert!(handle.await.is_ok());
+        }
+    }
+
+    struct TestEventHandlerImplShouldPanic {}
+
+    #[async_trait]
+    impl EventHandler for TestEventHandlerImplShouldPanic {
+        async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()> {
+            let event = events
+                .first()
+                .unwrap()
+                .as_any()
+                .downcast_ref::<TestEvent>()
+                .unwrap();
+
+            // Set the incorrect payload and event type to trigger the panic.
+            assert_eq!(
+                event.json_payload().unwrap(),
+                "{\"procedure_id\": \"should_panic\"}"
+            );
+            assert_eq!(event.event_type(), "should_panic");
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_event_recorder_should_panic() {
+        let mut event_recorder = EventRecorderImpl::new(
+            Box::new(TestEventHandlerImplShouldPanic {}),
+            EventRecorderOptions::default(),
+        );
+
+        event_recorder.record(Box::new(TestEvent {}));
+
+        // Sleep for a while to let the event be sent to the event handler.
+        sleep(Duration::from_millis(500)).await;
+
+        // Close the event recorder to flush the buffer.
+        event_recorder.close();
+
+        // Sleep for a while to let the background task process the event.
+        sleep(Duration::from_millis(500)).await;
+
+        if let Some(handle) = event_recorder.handle.take() {
+            assert!(handle.await.unwrap_err().is_panic());
+        }
+    }
+}
--- a/src/common/frontend/src/lib.rs
+++ b/src/common/frontend/src/lib.rs
@@ -23,7 +23,7 @@ pub mod selector;
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct DisplayProcessId {
    pub server_addr: String,
-    pub id: u64,
+    pub id: u32,
 }

 impl Display for DisplayProcessId {
@@ -44,7 +44,7 @@ impl TryFrom<&str> for DisplayProcessId {
        let id = split
            .next()
            .context(error::ParseProcessIdSnafu { s: value })?;
-        let id = u64::from_str(id)
+        let id = u32::from_str(id)
            .ok()
            .context(error::ParseProcessIdSnafu { s: value })?;
        Ok(DisplayProcessId { server_addr, id })
--- a/src/common/frontend/src/selector.rs
+++ b/src/common/frontend/src/selector.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::fmt::Debug;
 use std::time::Duration;

 use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
@@ -30,7 +31,7 @@ use crate::error::{MetaSnafu, Result};
 pub type FrontendClientPtr = Box<dyn FrontendClient>;

 #[async_trait::async_trait]
-pub trait FrontendClient: Send {
+pub trait FrontendClient: Send + Debug {
    async fn list_process(&mut self, req: ListProcessRequest) -> Result<ListProcessResponse>;

    async fn kill_process(&mut self, req: KillProcessRequest) -> Result<KillProcessResponse>;
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -16,6 +16,8 @@ geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
 ahash.workspace = true
 api.workspace = true
 arc-swap = "1.0"
+arrow.workspace = true
+arrow-schema.workspace = true
 async-trait.workspace = true
 bincode = "1.3"
 catalog.workspace = true
@@ -33,6 +35,8 @@ common-version.workspace = true
 datafusion.workspace = true
 datafusion-common.workspace = true
 datafusion-expr.workspace = true
+datafusion-functions-aggregate-common.workspace = true
+datafusion-physical-expr.workspace = true
 datatypes.workspace = true
 derive_more = { version = "1", default-features = false, features = ["display"] }
 geo = { version = "0.29", optional = true }
@@ -61,5 +65,7 @@ wkt = { version = "0.11", optional = true }

 [dev-dependencies]
 approx = "0.5"
+futures.workspace = true
+pretty_assertions = "1.4.0"
 serde = { version = "1.0", features = ["derive"] }
 tokio.workspace = true
--- a/src/common/function/src/aggrs.rs
+++ b/src/common/function/src/aggrs.rs
@@ -13,6 +13,9 @@
 // limitations under the License.

 pub mod approximate;
+pub mod count_hash;
 #[cfg(feature = "geo")]
 pub mod geo;
 pub mod vector;
+
+pub mod aggr_wrapper;
--- a/src/common/function/src/aggrs/aggr_wrapper.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper.rs
@@ -0,0 +1,538 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Wrapper for making aggregate functions out of state/merge functions of original aggregate functions.
+//!
+//! i.e. for a aggregate function `foo`, we will have a state function `foo_state` and a merge function `foo_merge`.
+//!
+//! `foo_state`'s input args is the same as `foo`'s, and its output is a state object.
+//! Note that `foo_state` might have multiple output columns, so it's a struct array
+//! that each output column is a struct field.
+//! `foo_merge`'s input arg is the same as `foo_state`'s output, and its output is the same as `foo`'s input.
+//!
+
+use std::sync::Arc;
+
+use arrow::array::StructArray;
+use arrow_schema::Fields;
+use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
+use datafusion::optimizer::AnalyzerRule;
+use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter;
+use datafusion_common::{Column, ScalarValue};
+use datafusion_expr::expr::AggregateFunction;
+use datafusion_expr::function::StateFieldsArgs;
+use datafusion_expr::{
+    Accumulator, Aggregate, AggregateUDF, AggregateUDFImpl, Expr, ExprSchemable, LogicalPlan,
+    Signature,
+};
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
+use datatypes::arrow::datatypes::{DataType, Field};
+
+/// Returns the name of the state function for the given aggregate function name.
+/// The state function is used to compute the state of the aggregate function.
+/// The state function's name is in the format `__<aggr_name>_state
+pub fn aggr_state_func_name(aggr_name: &str) -> String {
+    format!("__{}_state", aggr_name)
+}
+
+/// Returns the name of the merge function for the given aggregate function name.
+/// The merge function is used to merge the states of the state functions.
+/// The merge function's name is in the format `__<aggr_name>_merge
+pub fn aggr_merge_func_name(aggr_name: &str) -> String {
+    format!("__{}_merge", aggr_name)
+}
+
+/// A wrapper to make an aggregate function out of the state and merge functions of the original aggregate function.
+/// It contains the original aggregate function, the state functions, and the merge function.
+///
+/// Notice state functions may have multiple output columns, so it's return type is always a struct array, and the merge function is used to merge the states of the state functions.
+#[derive(Debug, Clone)]
+pub struct StateMergeHelper;
+
+/// A struct to hold the two aggregate plans, one for the state function(lower) and one for the merge function(upper).
+#[allow(unused)]
+#[derive(Debug, Clone)]
+pub struct StepAggrPlan {
+    /// Upper merge plan, which is the aggregate plan that merges the states of the state function.
+    pub upper_merge: Arc<LogicalPlan>,
+    /// Lower state plan, which is the aggregate plan that computes the state of the aggregate function.
+    pub lower_state: Arc<LogicalPlan>,
+}
+
+pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
+    let mut expr_ref = expr;
+    while let Expr::Alias(alias) = expr_ref {
+        expr_ref = &alias.expr;
+    }
+    if let Expr::AggregateFunction(aggr_func) = expr_ref {
+        Some(aggr_func)
+    } else {
+        None
+    }
+}
+
+impl StateMergeHelper {
+    /// Split an aggregate plan into two aggregate plans, one for the state function and one for the merge function.
+    pub fn split_aggr_node(aggr_plan: Aggregate) -> datafusion_common::Result<StepAggrPlan> {
+        let aggr = {
+            // certain aggr func need type coercion to work correctly, so we need to analyze the plan first.
+            let aggr_plan = TypeCoercion::new().analyze(
+                LogicalPlan::Aggregate(aggr_plan).clone(),
+                &Default::default(),
+            )?;
+            if let LogicalPlan::Aggregate(aggr) = aggr_plan {
+                aggr
+            } else {
+                return Err(datafusion_common::DataFusionError::Internal(format!(
+                    "Failed to coerce expressions in aggregate plan, expected Aggregate, got: {:?}",
+                    aggr_plan
+                )));
+            }
+        };
+        let mut lower_aggr_exprs = vec![];
+        let mut upper_aggr_exprs = vec![];
+
+        for aggr_expr in aggr.aggr_expr.iter() {
+            let Some(aggr_func) = get_aggr_func(aggr_expr) else {
+                return Err(datafusion_common::DataFusionError::NotImplemented(format!(
+                    "Unsupported aggregate expression for step aggr optimize: {:?}",
+                    aggr_expr
+                )));
+            };
+
+            let original_input_types = aggr_func
+                .args
+                .iter()
+                .map(|e| e.get_type(&aggr.input.schema()))
+                .collect::<Result<Vec<_>, _>>()?;
+
+            // first create the state function from the original aggregate function.
+            let state_func = StateWrapper::new((*aggr_func.func).clone())?;
+
+            let expr = AggregateFunction {
+                func: Arc::new(state_func.into()),
+                args: aggr_func.args.clone(),
+                distinct: aggr_func.distinct,
+                filter: aggr_func.filter.clone(),
+                order_by: aggr_func.order_by.clone(),
+                null_treatment: aggr_func.null_treatment,
+            };
+            let expr = Expr::AggregateFunction(expr);
+            let lower_state_output_col_name = expr.schema_name().to_string();
+
+            lower_aggr_exprs.push(expr);
+
+            let (original_phy_expr, _filter, _ordering) = create_aggregate_expr_and_maybe_filter(
+                aggr_expr,
+                aggr.input.schema(),
+                aggr.input.schema().as_arrow(),
+                &Default::default(),
+            )?;
+
+            let merge_func = MergeWrapper::new(
+                (*aggr_func.func).clone(),
+                original_phy_expr,
+                original_input_types,
+            )?;
+            let arg = Expr::Column(Column::new_unqualified(lower_state_output_col_name));
+            let expr = AggregateFunction {
+                func: Arc::new(merge_func.into()),
+                args: vec![arg],
+                distinct: aggr_func.distinct,
+                filter: aggr_func.filter.clone(),
+                order_by: aggr_func.order_by.clone(),
+                null_treatment: aggr_func.null_treatment,
+            };
+
+            // alias to the original aggregate expr's schema name, so parent plan can refer to it
+            // correctly.
+            let expr = Expr::AggregateFunction(expr).alias(aggr_expr.schema_name().to_string());
+            upper_aggr_exprs.push(expr);
+        }
+
+        let mut lower = aggr.clone();
+        lower.aggr_expr = lower_aggr_exprs;
+        let lower_plan = LogicalPlan::Aggregate(lower);
+
+        // update aggregate's output schema
+        let lower_plan = Arc::new(lower_plan.recompute_schema()?);
+
+        let mut upper = aggr.clone();
+        let aggr_plan = LogicalPlan::Aggregate(aggr);
+        upper.aggr_expr = upper_aggr_exprs;
+        upper.input = lower_plan.clone();
+        // upper schema's output schema should be the same as the original aggregate plan's output schema
+        let upper_check = upper.clone();
+        let upper_plan = Arc::new(LogicalPlan::Aggregate(upper_check).recompute_schema()?);
+        if *upper_plan.schema() != *aggr_plan.schema() {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                 "Upper aggregate plan's schema is not the same as the original aggregate plan's schema: \n[transformed]:{}\n[   original]{}",
+                upper_plan.schema(), aggr_plan.schema()
+            )));
+        }
+
+        Ok(StepAggrPlan {
+            lower_state: lower_plan,
+            upper_merge: upper_plan,
+        })
+    }
+}
+
+/// Wrapper to make an aggregate function out of a state function.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StateWrapper {
+    inner: AggregateUDF,
+    name: String,
+}
+
+impl StateWrapper {
+    /// `state_index`: The index of the state in the output of the state function.
+    pub fn new(inner: AggregateUDF) -> datafusion_common::Result<Self> {
+        let name = aggr_state_func_name(inner.name());
+        Ok(Self { inner, name })
+    }
+
+    pub fn inner(&self) -> &AggregateUDF {
+        &self.inner
+    }
+
+    /// Deduce the return type of the original aggregate function
+    /// based on the accumulator arguments.
+    ///
+    pub fn deduce_aggr_return_type(
+        &self,
+        acc_args: &datafusion_expr::function::AccumulatorArgs,
+    ) -> datafusion_common::Result<DataType> {
+        let input_exprs = acc_args.exprs;
+        let input_schema = acc_args.schema;
+        let input_types = input_exprs
+            .iter()
+            .map(|e| e.data_type(input_schema))
+            .collect::<Result<Vec<_>, _>>()?;
+        let return_type = self.inner.return_type(&input_types)?;
+        Ok(return_type)
+    }
+}
+
+impl AggregateUDFImpl for StateWrapper {
+    fn accumulator<'a, 'b>(
+        &'a self,
+        acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
+    ) -> datafusion_common::Result<Box<dyn Accumulator>> {
+        // fix and recover proper acc args for the original aggregate function.
+        let state_type = acc_args.return_type.clone();
+        let inner = {
+            let old_return_type = self.deduce_aggr_return_type(&acc_args)?;
+            let acc_args = datafusion_expr::function::AccumulatorArgs {
+                return_type: &old_return_type,
+                schema: acc_args.schema,
+                ignore_nulls: acc_args.ignore_nulls,
+                ordering_req: acc_args.ordering_req,
+                is_reversed: acc_args.is_reversed,
+                name: acc_args.name,
+                is_distinct: acc_args.is_distinct,
+                exprs: acc_args.exprs,
+            };
+            self.inner.accumulator(acc_args)?
+        };
+        Ok(Box::new(StateAccum::new(inner, state_type)?))
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+    fn name(&self) -> &str {
+        self.name.as_str()
+    }
+
+    fn is_nullable(&self) -> bool {
+        self.inner.is_nullable()
+    }
+
+    /// Return state_fields as the output struct type.
+    ///
+    fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
+        let old_return_type = self.inner.return_type(arg_types)?;
+        let state_fields_args = StateFieldsArgs {
+            name: self.inner().name(),
+            input_types: arg_types,
+            return_type: &old_return_type,
+            // TODO(discord9): how to get this?, probably ok?
+            ordering_fields: &[],
+            is_distinct: false,
+        };
+        let state_fields = self.inner.state_fields(state_fields_args)?;
+        let struct_field = DataType::Struct(state_fields.into());
+        Ok(struct_field)
+    }
+
+    /// The state function's output fields are the same as the original aggregate function's state fields.
+    fn state_fields(
+        &self,
+        args: datafusion_expr::function::StateFieldsArgs,
+    ) -> datafusion_common::Result<Vec<Field>> {
+        let old_return_type = self.inner.return_type(args.input_types)?;
+        let state_fields_args = StateFieldsArgs {
+            name: args.name,
+            input_types: args.input_types,
+            return_type: &old_return_type,
+            ordering_fields: args.ordering_fields,
+            is_distinct: args.is_distinct,
+        };
+        self.inner.state_fields(state_fields_args)
+    }
+
+    /// The state function's signature is the same as the original aggregate function's signature,
+    fn signature(&self) -> &Signature {
+        self.inner.signature()
+    }
+
+    /// Coerce types also do nothing, as optimzer should be able to already make struct types
+    fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
+        self.inner.coerce_types(arg_types)
+    }
+}
+
+/// The wrapper's input is the same as the original aggregate function's input,
+/// and the output is the state function's output.
+#[derive(Debug)]
+pub struct StateAccum {
+    inner: Box<dyn Accumulator>,
+    state_fields: Fields,
+}
+
+impl StateAccum {
+    pub fn new(
+        inner: Box<dyn Accumulator>,
+        state_type: DataType,
+    ) -> datafusion_common::Result<Self> {
+        let DataType::Struct(fields) = state_type else {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "Expected a struct type for state, got: {:?}",
+                state_type
+            )));
+        };
+        Ok(Self {
+            inner,
+            state_fields: fields,
+        })
+    }
+}
+
+impl Accumulator for StateAccum {
+    fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
+        let state = self.inner.state()?;
+
+        let array = state
+            .iter()
+            .map(|s| s.to_array())
+            .collect::<Result<Vec<_>, _>>()?;
+        let struct_array = StructArray::try_new(self.state_fields.clone(), array, None)?;
+        Ok(ScalarValue::Struct(Arc::new(struct_array)))
+    }
+
+    fn merge_batch(
+        &mut self,
+        states: &[datatypes::arrow::array::ArrayRef],
+    ) -> datafusion_common::Result<()> {
+        self.inner.merge_batch(states)
+    }
+
+    fn update_batch(
+        &mut self,
+        values: &[datatypes::arrow::array::ArrayRef],
+    ) -> datafusion_common::Result<()> {
+        self.inner.update_batch(values)
+    }
+
+    fn size(&self) -> usize {
+        self.inner.size()
+    }
+
+    fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
+        self.inner.state()
+    }
+}
+
+/// TODO(discord9): mark this function as non-ser/de able
+///
+/// This wrapper shouldn't be register as a udaf, as it contain extra data that is not serializable.
+/// and changes for different logical plans.
+#[derive(Debug, Clone)]
+pub struct MergeWrapper {
+    inner: AggregateUDF,
+    name: String,
+    merge_signature: Signature,
+    /// The original physical expression of the aggregate function, can't store the original aggregate function directly, as PhysicalExpr didn't implement Any
+    original_phy_expr: Arc<AggregateFunctionExpr>,
+    original_input_types: Vec<DataType>,
+}
+impl MergeWrapper {
+    pub fn new(
+        inner: AggregateUDF,
+        original_phy_expr: Arc<AggregateFunctionExpr>,
+        original_input_types: Vec<DataType>,
+    ) -> datafusion_common::Result<Self> {
+        let name = aggr_merge_func_name(inner.name());
+        // the input type is actually struct type, which is the state fields of the original aggregate function.
+        let merge_signature = Signature::user_defined(datafusion_expr::Volatility::Immutable);
+
+        Ok(Self {
+            inner,
+            name,
+            merge_signature,
+            original_phy_expr,
+            original_input_types,
+        })
+    }
+
+    pub fn inner(&self) -> &AggregateUDF {
+        &self.inner
+    }
+}
+
+impl AggregateUDFImpl for MergeWrapper {
+    fn accumulator<'a, 'b>(
+        &'a self,
+        acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
+    ) -> datafusion_common::Result<Box<dyn Accumulator>> {
+        if acc_args.schema.fields().len() != 1
+            || !matches!(acc_args.schema.field(0).data_type(), DataType::Struct(_))
+        {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "Expected one struct type as input, got: {:?}",
+                acc_args.schema
+            )));
+        }
+        let input_type = acc_args.schema.field(0).data_type();
+        let DataType::Struct(fields) = input_type else {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "Expected a struct type for input, got: {:?}",
+                input_type
+            )));
+        };
+
+        let inner_accum = self.original_phy_expr.create_accumulator()?;
+        Ok(Box::new(MergeAccum::new(inner_accum, fields)))
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+    fn name(&self) -> &str {
+        self.name.as_str()
+    }
+
+    fn is_nullable(&self) -> bool {
+        self.inner.is_nullable()
+    }
+
+    /// Notice here the `arg_types` is actually the `state_fields`'s data types,
+    /// so return fixed return type instead of using `arg_types` to determine the return type.
+    fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
+        // The return type is the same as the original aggregate function's return type.
+        let ret_type = self.inner.return_type(&self.original_input_types)?;
+        Ok(ret_type)
+    }
+    fn signature(&self) -> &Signature {
+        &self.merge_signature
+    }
+
+    /// Coerce types also do nothing, as optimzer should be able to already make struct types
+    fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
+        // just check if the arg_types are only one and is struct array
+        if arg_types.len() != 1 || !matches!(arg_types.first(), Some(DataType::Struct(_))) {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "Expected one struct type as input, got: {:?}",
+                arg_types
+            )));
+        }
+        Ok(arg_types.to_vec())
+    }
+
+    /// Just return the original aggregate function's state fields.
+    fn state_fields(
+        &self,
+        _args: datafusion_expr::function::StateFieldsArgs,
+    ) -> datafusion_common::Result<Vec<Field>> {
+        self.original_phy_expr.state_fields()
+    }
+}
+
+/// The merge accumulator, which modify `update_batch`'s behavior to accept one struct array which
+/// include the state fields of original aggregate function, and merge said states into original accumulator
+/// the output is the same as original aggregate function
+#[derive(Debug)]
+pub struct MergeAccum {
+    inner: Box<dyn Accumulator>,
+    state_fields: Fields,
+}
+
+impl MergeAccum {
+    pub fn new(inner: Box<dyn Accumulator>, state_fields: &Fields) -> Self {
+        Self {
+            inner,
+            state_fields: state_fields.clone(),
+        }
+    }
+}
+
+impl Accumulator for MergeAccum {
+    fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
+        self.inner.evaluate()
+    }
+
+    fn merge_batch(&mut self, states: &[arrow::array::ArrayRef]) -> datafusion_common::Result<()> {
+        self.inner.merge_batch(states)
+    }
+
+    fn update_batch(&mut self, values: &[arrow::array::ArrayRef]) -> datafusion_common::Result<()> {
+        let value = values.first().ok_or_else(|| {
+            datafusion_common::DataFusionError::Internal("No values provided for merge".to_string())
+        })?;
+        // The input values are states from other accumulators, so we merge them.
+        let struct_arr = value
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .ok_or_else(|| {
+                datafusion_common::DataFusionError::Internal(format!(
+                    "Expected StructArray, got: {:?}",
+                    value.data_type()
+                ))
+            })?;
+        let fields = struct_arr.fields();
+        if fields != &self.state_fields {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "Expected state fields: {:?}, got: {:?}",
+                self.state_fields, fields
+            )));
+        }
+
+        // now fields should be the same, so we can merge the batch
+        // by pass the columns as order should be the same
+        let state_columns = struct_arr.columns();
+        self.inner.merge_batch(state_columns)
+    }
+
+    fn size(&self) -> usize {
+        self.inner.size()
+    }
+
+    fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
+        self.inner.state()
+    }
+}
+
+#[cfg(test)]
+mod tests;
--- a/src/common/function/src/aggrs/aggr_wrapper/tests.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper/tests.rs
@@ -0,0 +1,804 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::pin::Pin;
+use std::sync::{Arc, Mutex};
+use std::task::{Context, Poll};
+
+use arrow::array::{ArrayRef, Float64Array, Int64Array, UInt64Array};
+use arrow::record_batch::RecordBatch;
+use arrow_schema::SchemaRef;
+use datafusion::catalog::{Session, TableProvider};
+use datafusion::datasource::DefaultTableSource;
+use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
+use datafusion::functions_aggregate::average::avg_udaf;
+use datafusion::functions_aggregate::sum::sum_udaf;
+use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
+use datafusion::optimizer::AnalyzerRule;
+use datafusion::physical_plan::aggregates::AggregateExec;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
+use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
+use datafusion::prelude::SessionContext;
+use datafusion_common::{Column, TableReference};
+use datafusion_expr::expr::AggregateFunction;
+use datafusion_expr::sqlparser::ast::NullTreatment;
+use datafusion_expr::{Aggregate, Expr, LogicalPlan, SortExpr, TableScan};
+use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
+use datatypes::arrow_array::StringArray;
+use futures::{Stream, StreamExt as _};
+use pretty_assertions::assert_eq;
+
+use super::*;
+use crate::aggrs::approximate::hll::HllState;
+use crate::aggrs::approximate::uddsketch::UddSketchState;
+use crate::aggrs::count_hash::CountHash;
+use crate::function::Function as _;
+use crate::scalars::hll_count::HllCalcFunction;
+use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
+
+#[derive(Debug)]
+pub struct MockInputExec {
+    input: Vec<RecordBatch>,
+    schema: SchemaRef,
+    properties: PlanProperties,
+}
+
+impl MockInputExec {
+    pub fn new(input: Vec<RecordBatch>, schema: SchemaRef) -> Self {
+        Self {
+            properties: PlanProperties::new(
+                EquivalenceProperties::new(schema.clone()),
+                Partitioning::UnknownPartitioning(1),
+                EmissionType::Incremental,
+                Boundedness::Bounded,
+            ),
+            input,
+            schema,
+        }
+    }
+}
+
+impl DisplayAs for MockInputExec {
+    fn fmt_as(&self, _t: DisplayFormatType, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        unimplemented!()
+    }
+}
+
+impl ExecutionPlan for MockInputExec {
+    fn name(&self) -> &str {
+        "MockInputExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        Ok(self)
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> datafusion_common::Result<SendableRecordBatchStream> {
+        let stream = MockStream {
+            stream: self.input.clone(),
+            schema: self.schema.clone(),
+            idx: 0,
+        };
+        Ok(Box::pin(stream))
+    }
+}
+
+struct MockStream {
+    stream: Vec<RecordBatch>,
+    schema: SchemaRef,
+    idx: usize,
+}
+
+impl Stream for MockStream {
+    type Item = datafusion_common::Result<RecordBatch>;
+    fn poll_next(
+        mut self: Pin<&mut Self>,
+        _cx: &mut Context<'_>,
+    ) -> Poll<Option<datafusion_common::Result<RecordBatch>>> {
+        if self.idx < self.stream.len() {
+            let ret = self.stream[self.idx].clone();
+            self.idx += 1;
+            Poll::Ready(Some(Ok(ret)))
+        } else {
+            Poll::Ready(None)
+        }
+    }
+}
+
+impl RecordBatchStream for MockStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+#[derive(Debug)]
+struct DummyTableProvider {
+    schema: Arc<arrow_schema::Schema>,
+    record_batch: Mutex<Option<RecordBatch>>,
+}
+
+impl DummyTableProvider {
+    #[allow(unused)]
+    pub fn new(schema: Arc<arrow_schema::Schema>, record_batch: Option<RecordBatch>) -> Self {
+        Self {
+            schema,
+            record_batch: Mutex::new(record_batch),
+        }
+    }
+}
+
+impl Default for DummyTableProvider {
+    fn default() -> Self {
+        Self {
+            schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "number",
+                DataType::Int64,
+                true,
+            )])),
+            record_batch: Mutex::new(None),
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl TableProvider for DummyTableProvider {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn schema(&self) -> Arc<arrow_schema::Schema> {
+        self.schema.clone()
+    }
+
+    fn table_type(&self) -> datafusion_expr::TableType {
+        datafusion_expr::TableType::Base
+    }
+
+    async fn scan(
+        &self,
+        _state: &dyn Session,
+        _projection: Option<&Vec<usize>>,
+        _filters: &[Expr],
+        _limit: Option<usize>,
+    ) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
+        let input: Vec<RecordBatch> = self
+            .record_batch
+            .lock()
+            .unwrap()
+            .clone()
+            .map(|r| vec![r])
+            .unwrap_or_default();
+        Ok(Arc::new(MockInputExec::new(input, self.schema.clone())))
+    }
+}
+
+fn dummy_table_scan() -> LogicalPlan {
+    let table_provider = Arc::new(DummyTableProvider::default());
+    let table_source = DefaultTableSource::new(table_provider);
+    LogicalPlan::TableScan(
+        TableScan::try_new(
+            TableReference::bare("Number"),
+            Arc::new(table_source),
+            None,
+            vec![],
+            None,
+        )
+        .unwrap(),
+    )
+}
+
+#[tokio::test]
+async fn test_sum_udaf() {
+    let ctx = SessionContext::new();
+
+    let sum = datafusion::functions_aggregate::sum::sum_udaf();
+    let sum = (*sum).clone();
+    let original_aggr = Aggregate::try_new(
+        Arc::new(dummy_table_scan()),
+        vec![],
+        vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+            Arc::new(sum.clone()),
+            vec![Expr::Column(Column::new_unqualified("number"))],
+            false,
+            None,
+            None,
+            None,
+        ))],
+    )
+    .unwrap();
+    let res = StateMergeHelper::split_aggr_node(original_aggr).unwrap();
+
+    let expected_lower_plan = LogicalPlan::Aggregate(
+        Aggregate::try_new(
+            Arc::new(dummy_table_scan()),
+            vec![],
+            vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+                Arc::new(StateWrapper::new(sum.clone()).unwrap().into()),
+                vec![Expr::Column(Column::new_unqualified("number"))],
+                false,
+                None,
+                None,
+                None,
+            ))],
+        )
+        .unwrap(),
+    )
+    .recompute_schema()
+    .unwrap();
+    assert_eq!(res.lower_state.as_ref(), &expected_lower_plan);
+
+    let expected_merge_plan = LogicalPlan::Aggregate(
+        Aggregate::try_new(
+            Arc::new(expected_lower_plan),
+            vec![],
+            vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+                Arc::new(
+                    MergeWrapper::new(
+                        sum.clone(),
+                        Arc::new(
+                            AggregateExprBuilder::new(
+                                Arc::new(sum.clone()),
+                                vec![Arc::new(
+                                    datafusion::physical_expr::expressions::Column::new(
+                                        "number", 0,
+                                    ),
+                                )],
+                            )
+                            .schema(Arc::new(dummy_table_scan().schema().as_arrow().clone()))
+                            .alias("sum(number)")
+                            .build()
+                            .unwrap(),
+                        ),
+                        vec![DataType::Int64],
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                vec![Expr::Column(Column::new_unqualified("__sum_state(number)"))],
+                false,
+                None,
+                None,
+                None,
+            ))
+            .alias("sum(number)")],
+        )
+        .unwrap(),
+    );
+    assert_eq!(res.upper_merge.as_ref(), &expected_merge_plan);
+
+    let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
+        .create_physical_plan(&res.lower_state, &ctx.state())
+        .await
+        .unwrap();
+    let aggr_exec = phy_aggr_state_plan
+        .as_any()
+        .downcast_ref::<AggregateExec>()
+        .unwrap();
+    let aggr_func_expr = &aggr_exec.aggr_expr()[0];
+    let mut state_accum = aggr_func_expr.create_accumulator().unwrap();
+
+    // evaluate the state function
+    let input = Int64Array::from(vec![Some(1), Some(2), None, Some(3)]);
+    let values = vec![Arc::new(input) as arrow::array::ArrayRef];
+
+    state_accum.update_batch(&values).unwrap();
+    let state = state_accum.state().unwrap();
+    assert_eq!(state.len(), 1);
+    assert_eq!(state[0], ScalarValue::Int64(Some(6)));
+
+    let eval_res = state_accum.evaluate().unwrap();
+    assert_eq!(
+        eval_res,
+        ScalarValue::Struct(Arc::new(
+            StructArray::try_new(
+                vec![Field::new("sum[sum]", DataType::Int64, true)].into(),
+                vec![Arc::new(Int64Array::from(vec![Some(6)]))],
+                None,
+            )
+            .unwrap(),
+        ))
+    );
+
+    let phy_aggr_merge_plan = DefaultPhysicalPlanner::default()
+        .create_physical_plan(&res.upper_merge, &ctx.state())
+        .await
+        .unwrap();
+    let aggr_exec = phy_aggr_merge_plan
+        .as_any()
+        .downcast_ref::<AggregateExec>()
+        .unwrap();
+    let aggr_func_expr = &aggr_exec.aggr_expr()[0];
+    let mut merge_accum = aggr_func_expr.create_accumulator().unwrap();
+
+    let merge_input =
+        vec![Arc::new(Int64Array::from(vec![Some(6), Some(42), None])) as arrow::array::ArrayRef];
+    let merge_input_struct_arr = StructArray::try_new(
+        vec![Field::new("sum[sum]", DataType::Int64, true)].into(),
+        merge_input,
+        None,
+    )
+    .unwrap();
+
+    merge_accum
+        .update_batch(&[Arc::new(merge_input_struct_arr)])
+        .unwrap();
+    let merge_state = merge_accum.state().unwrap();
+    assert_eq!(merge_state.len(), 1);
+    assert_eq!(merge_state[0], ScalarValue::Int64(Some(48)));
+
+    let merge_eval_res = merge_accum.evaluate().unwrap();
+    assert_eq!(merge_eval_res, ScalarValue::Int64(Some(48)));
+}
+
+#[tokio::test]
+async fn test_avg_udaf() {
+    let ctx = SessionContext::new();
+
+    let avg = datafusion::functions_aggregate::average::avg_udaf();
+    let avg = (*avg).clone();
+
+    let original_aggr = Aggregate::try_new(
+        Arc::new(dummy_table_scan()),
+        vec![],
+        vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+            Arc::new(avg.clone()),
+            vec![Expr::Column(Column::new_unqualified("number"))],
+            false,
+            None,
+            None,
+            None,
+        ))],
+    )
+    .unwrap();
+    let res = StateMergeHelper::split_aggr_node(original_aggr).unwrap();
+
+    let state_func: Arc<AggregateUDF> = Arc::new(StateWrapper::new(avg.clone()).unwrap().into());
+    let expected_aggr_state_plan = LogicalPlan::Aggregate(
+        Aggregate::try_new(
+            Arc::new(dummy_table_scan()),
+            vec![],
+            vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+                state_func,
+                vec![Expr::Column(Column::new_unqualified("number"))],
+                false,
+                None,
+                None,
+                None,
+            ))],
+        )
+        .unwrap(),
+    );
+    // type coerced so avg aggr function can function correctly
+    let coerced_aggr_state_plan = TypeCoercion::new()
+        .analyze(expected_aggr_state_plan.clone(), &Default::default())
+        .unwrap();
+    assert_eq!(res.lower_state.as_ref(), &coerced_aggr_state_plan);
+    assert_eq!(
+        res.lower_state.schema().as_arrow(),
+        &arrow_schema::Schema::new(vec![Field::new(
+            "__avg_state(number)",
+            DataType::Struct(
+                vec![
+                    Field::new("avg[count]", DataType::UInt64, true),
+                    Field::new("avg[sum]", DataType::Float64, true)
+                ]
+                .into()
+            ),
+            true,
+        )])
+    );
+
+    let expected_merge_fn = MergeWrapper::new(
+        avg.clone(),
+        Arc::new(
+            AggregateExprBuilder::new(
+                Arc::new(avg.clone()),
+                vec![Arc::new(
+                    datafusion::physical_expr::expressions::Column::new("number", 0),
+                )],
+            )
+            .schema(Arc::new(dummy_table_scan().schema().as_arrow().clone()))
+            .alias("avg(number)")
+            .build()
+            .unwrap(),
+        ),
+        // coerced to float64
+        vec![DataType::Float64],
+    )
+    .unwrap();
+
+    let expected_merge_plan = LogicalPlan::Aggregate(
+        Aggregate::try_new(
+            Arc::new(coerced_aggr_state_plan.clone()),
+            vec![],
+            vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+                Arc::new(expected_merge_fn.into()),
+                vec![Expr::Column(Column::new_unqualified("__avg_state(number)"))],
+                false,
+                None,
+                None,
+                None,
+            ))
+            .alias("avg(number)")],
+        )
+        .unwrap(),
+    );
+    assert_eq!(res.upper_merge.as_ref(), &expected_merge_plan);
+
+    let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
+        .create_physical_plan(&coerced_aggr_state_plan, &ctx.state())
+        .await
+        .unwrap();
+    let aggr_exec = phy_aggr_state_plan
+        .as_any()
+        .downcast_ref::<AggregateExec>()
+        .unwrap();
+    let aggr_func_expr = &aggr_exec.aggr_expr()[0];
+    let mut state_accum = aggr_func_expr.create_accumulator().unwrap();
+
+    // evaluate the state function
+    let input = Float64Array::from(vec![Some(1.), Some(2.), None, Some(3.)]);
+    let values = vec![Arc::new(input) as arrow::array::ArrayRef];
+
+    state_accum.update_batch(&values).unwrap();
+    let state = state_accum.state().unwrap();
+    assert_eq!(state.len(), 2);
+    assert_eq!(state[0], ScalarValue::UInt64(Some(3)));
+    assert_eq!(state[1], ScalarValue::Float64(Some(6.)));
+
+    let eval_res = state_accum.evaluate().unwrap();
+    let expected = Arc::new(
+        StructArray::try_new(
+            vec![
+                Field::new("avg[count]", DataType::UInt64, true),
+                Field::new("avg[sum]", DataType::Float64, true),
+            ]
+            .into(),
+            vec![
+                Arc::new(UInt64Array::from(vec![Some(3)])),
+                Arc::new(Float64Array::from(vec![Some(6.)])),
+            ],
+            None,
+        )
+        .unwrap(),
+    );
+    assert_eq!(eval_res, ScalarValue::Struct(expected));
+
+    let phy_aggr_merge_plan = DefaultPhysicalPlanner::default()
+        .create_physical_plan(&res.upper_merge, &ctx.state())
+        .await
+        .unwrap();
+    let aggr_exec = phy_aggr_merge_plan
+        .as_any()
+        .downcast_ref::<AggregateExec>()
+        .unwrap();
+    let aggr_func_expr = &aggr_exec.aggr_expr()[0];
+
+    let mut merge_accum = aggr_func_expr.create_accumulator().unwrap();
+
+    let merge_input = vec![
+        Arc::new(UInt64Array::from(vec![Some(3), Some(42), None])) as arrow::array::ArrayRef,
+        Arc::new(Float64Array::from(vec![Some(48.), Some(84.), None])),
+    ];
+    let merge_input_struct_arr = StructArray::try_new(
+        vec![
+            Field::new("avg[count]", DataType::UInt64, true),
+            Field::new("avg[sum]", DataType::Float64, true),
+        ]
+        .into(),
+        merge_input,
+        None,
+    )
+    .unwrap();
+
+    merge_accum
+        .update_batch(&[Arc::new(merge_input_struct_arr)])
+        .unwrap();
+    let merge_state = merge_accum.state().unwrap();
+    assert_eq!(merge_state.len(), 2);
+    assert_eq!(merge_state[0], ScalarValue::UInt64(Some(45)));
+    assert_eq!(merge_state[1], ScalarValue::Float64(Some(132.)));
+
+    let merge_eval_res = merge_accum.evaluate().unwrap();
+    // the merge function returns the average, which is 132 / 45
+    assert_eq!(merge_eval_res, ScalarValue::Float64(Some(132. / 45_f64)));
+}
+
+/// For testing whether the UDAF state fields are correctly implemented.
+/// esp. for our own custom UDAF's state fields.
+/// By compare eval results before and after split to state/merge functions.
+#[tokio::test]
+async fn test_udaf_correct_eval_result() {
+    struct TestCase {
+        func: Arc<AggregateUDF>,
+        args: Vec<Expr>,
+        input_schema: SchemaRef,
+        input: Vec<ArrayRef>,
+        expected_output: Option<ScalarValue>,
+        expected_fn: Option<ExpectedFn>,
+        distinct: bool,
+        filter: Option<Box<Expr>>,
+        order_by: Option<Vec<SortExpr>>,
+        null_treatment: Option<NullTreatment>,
+    }
+    type ExpectedFn = fn(ArrayRef) -> bool;
+
+    let test_cases = vec![
+        TestCase {
+            func: sum_udaf(),
+            input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "number",
+                DataType::Int64,
+                true,
+            )])),
+            args: vec![Expr::Column(Column::new_unqualified("number"))],
+            input: vec![Arc::new(Int64Array::from(vec![
+                Some(1),
+                Some(2),
+                None,
+                Some(3),
+            ]))],
+            expected_output: Some(ScalarValue::Int64(Some(6))),
+            expected_fn: None,
+            distinct: false,
+            filter: None,
+            order_by: None,
+            null_treatment: None,
+        },
+        TestCase {
+            func: avg_udaf(),
+            input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "number",
+                DataType::Int64,
+                true,
+            )])),
+            args: vec![Expr::Column(Column::new_unqualified("number"))],
+            input: vec![Arc::new(Int64Array::from(vec![
+                Some(1),
+                Some(2),
+                None,
+                Some(3),
+            ]))],
+            expected_output: Some(ScalarValue::Float64(Some(2.0))),
+            expected_fn: None,
+            distinct: false,
+            filter: None,
+            order_by: None,
+            null_treatment: None,
+        },
+        TestCase {
+            func: Arc::new(CountHash::udf_impl()),
+            input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "number",
+                DataType::Int64,
+                true,
+            )])),
+            args: vec![Expr::Column(Column::new_unqualified("number"))],
+            input: vec![Arc::new(Int64Array::from(vec![
+                Some(1),
+                Some(2),
+                None,
+                Some(3),
+                Some(3),
+                Some(3),
+            ]))],
+            expected_output: Some(ScalarValue::Int64(Some(4))),
+            expected_fn: None,
+            distinct: false,
+            filter: None,
+            order_by: None,
+            null_treatment: None,
+        },
+        TestCase {
+            func: Arc::new(UddSketchState::state_udf_impl()),
+            input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "number",
+                DataType::Float64,
+                true,
+            )])),
+            args: vec![
+                Expr::Literal(ScalarValue::Int64(Some(128))),
+                Expr::Literal(ScalarValue::Float64(Some(0.05))),
+                Expr::Column(Column::new_unqualified("number")),
+            ],
+            input: vec![Arc::new(Float64Array::from(vec![
+                Some(1.),
+                Some(2.),
+                None,
+                Some(3.),
+                Some(3.),
+                Some(3.),
+            ]))],
+            expected_output: None,
+            expected_fn: Some(|arr| {
+                let percent = ScalarValue::Float64(Some(0.5)).to_array().unwrap();
+                let percent = datatypes::vectors::Helper::try_into_vector(percent).unwrap();
+                let state = datatypes::vectors::Helper::try_into_vector(arr).unwrap();
+                let udd_calc = UddSketchCalcFunction;
+                let res = udd_calc
+                    .eval(&Default::default(), &[percent, state])
+                    .unwrap();
+                let binding = res.to_arrow_array();
+                let res_arr = binding.as_any().downcast_ref::<Float64Array>().unwrap();
+                assert!(res_arr.len() == 1);
+                assert!((res_arr.value(0) - 2.856578984907706f64).abs() <= f64::EPSILON);
+                true
+            }),
+            distinct: false,
+            filter: None,
+            order_by: None,
+            null_treatment: None,
+        },
+        TestCase {
+            func: Arc::new(HllState::state_udf_impl()),
+            input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "word",
+                DataType::Utf8,
+                true,
+            )])),
+            args: vec![Expr::Column(Column::new_unqualified("word"))],
+            input: vec![Arc::new(StringArray::from(vec![
+                Some("foo"),
+                Some("bar"),
+                None,
+                Some("baz"),
+                Some("baz"),
+            ]))],
+            expected_output: None,
+            expected_fn: Some(|arr| {
+                let state = datatypes::vectors::Helper::try_into_vector(arr).unwrap();
+                let hll_calc = HllCalcFunction;
+                let res = hll_calc.eval(&Default::default(), &[state]).unwrap();
+                let binding = res.to_arrow_array();
+                let res_arr = binding.as_any().downcast_ref::<UInt64Array>().unwrap();
+                assert!(res_arr.len() == 1);
+                assert_eq!(res_arr.value(0), 3);
+                true
+            }),
+            distinct: false,
+            filter: None,
+            order_by: None,
+            null_treatment: None,
+        },
+        // TODO(discord9): udd_merge/hll_merge/geo_path/quantile_aggr tests
+    ];
+    let test_table_ref = TableReference::bare("TestTable");
+
+    for case in test_cases {
+        let ctx = SessionContext::new();
+        let table_provider = DummyTableProvider::new(
+            case.input_schema.clone(),
+            Some(RecordBatch::try_new(case.input_schema.clone(), case.input.clone()).unwrap()),
+        );
+        let table_source = DefaultTableSource::new(Arc::new(table_provider));
+        let logical_plan = LogicalPlan::TableScan(
+            TableScan::try_new(
+                test_table_ref.clone(),
+                Arc::new(table_source),
+                None,
+                vec![],
+                None,
+            )
+            .unwrap(),
+        );
+
+        let args = case.args;
+
+        let aggr_expr = Expr::AggregateFunction(AggregateFunction::new_udf(
+            case.func.clone(),
+            args,
+            case.distinct,
+            case.filter,
+            case.order_by,
+            case.null_treatment,
+        ));
+
+        let aggr_plan = LogicalPlan::Aggregate(
+            Aggregate::try_new(Arc::new(logical_plan), vec![], vec![aggr_expr]).unwrap(),
+        );
+
+        // make sure the aggr_plan is type coerced
+        let aggr_plan = TypeCoercion::new()
+            .analyze(aggr_plan, &Default::default())
+            .unwrap();
+
+        // first eval the original aggregate function
+        let phy_full_aggr_plan = DefaultPhysicalPlanner::default()
+            .create_physical_plan(&aggr_plan, &ctx.state())
+            .await
+            .unwrap();
+
+        {
+            let unsplit_result = execute_phy_plan(&phy_full_aggr_plan).await.unwrap();
+            assert_eq!(unsplit_result.len(), 1);
+            let unsplit_batch = &unsplit_result[0];
+            assert_eq!(unsplit_batch.num_columns(), 1);
+            assert_eq!(unsplit_batch.num_rows(), 1);
+            let unsplit_col = unsplit_batch.column(0);
+            if let Some(expected_output) = &case.expected_output {
+                assert_eq!(unsplit_col.data_type(), &expected_output.data_type());
+                assert_eq!(unsplit_col.len(), 1);
+                assert_eq!(unsplit_col, &expected_output.to_array().unwrap());
+            }
+
+            if let Some(expected_fn) = &case.expected_fn {
+                assert!(expected_fn(unsplit_col.clone()));
+            }
+        }
+        let LogicalPlan::Aggregate(aggr_plan) = aggr_plan else {
+            panic!("Expected Aggregate plan");
+        };
+        let split_plan = StateMergeHelper::split_aggr_node(aggr_plan).unwrap();
+
+        let phy_upper_plan = DefaultPhysicalPlanner::default()
+            .create_physical_plan(&split_plan.upper_merge, &ctx.state())
+            .await
+            .unwrap();
+
+        // since upper plan use lower plan as input, execute upper plan should also execute lower plan
+        // which should give the same result as the original aggregate function
+        {
+            let split_res = execute_phy_plan(&phy_upper_plan).await.unwrap();
+
+            assert_eq!(split_res.len(), 1);
+            let split_batch = &split_res[0];
+            assert_eq!(split_batch.num_columns(), 1);
+            assert_eq!(split_batch.num_rows(), 1);
+            let split_col = split_batch.column(0);
+            if let Some(expected_output) = &case.expected_output {
+                assert_eq!(split_col.data_type(), &expected_output.data_type());
+                assert_eq!(split_col.len(), 1);
+                assert_eq!(split_col, &expected_output.to_array().unwrap());
+            }
+
+            if let Some(expected_fn) = &case.expected_fn {
+                assert!(expected_fn(split_col.clone()));
+            }
+        }
+    }
+}
+
+async fn execute_phy_plan(
+    phy_plan: &Arc<dyn ExecutionPlan>,
+) -> datafusion_common::Result<Vec<RecordBatch>> {
+    let task_ctx = Arc::new(TaskContext::default());
+    let mut stream = phy_plan.execute(0, task_ctx)?;
+    let mut batches = Vec::new();
+    while let Some(batch) = stream.next().await {
+        batches.push(batch?);
+    }
+    Ok(batches)
+}
--- a/src/common/function/src/aggrs/approximate.rs
+++ b/src/common/function/src/aggrs/approximate.rs
@@ -14,8 +14,8 @@

 use crate::function_registry::FunctionRegistry;

-pub(crate) mod hll;
-mod uddsketch;
+pub mod hll;
+pub mod uddsketch;

 pub(crate) struct ApproximateFunction;

--- a/Show More
+++ b/Show More