poc/create-alter-for-metrics:

### Commit Message Enhance Prometheus Bulk Write Handling - **`server.rs`**: Introduced `start_background_task` in `PromBulkState` to handle asynchronous batch processing and SST file writing. Added a new `tx` field to manage task communication. - **`access_layer.rs`**: Added `file_id` method to `ParquetWriter` for file identification. - **`batch_builder.rs`**: Modified `MetricsBatchBuilder` to utilize session catalog and schema, and updated batch processing logic to handle column metadata. - **`prom_store.rs`**: Updated `remote_write` to use `decode_remote_write_request_to_batch` for batch processing and send data to the background task. - **`prom_row_builder.rs`**: Made `TableBuilder` and `TablesBuilder` fields public for external access. - **`proto.rs`**: Exposed `table_data` in `PromWriteRequest` for batch processing. Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-12-25 23:49:58 +00:00 · 2025-06-30 08:27:26 +00:00 · 2025-06-29 14:07:57 +00:00 · 2025-06-28 09:58:33 +00:00 · 2025-06-27 08:50:25 +00:00 · 2025-06-26 13:00:54 +00:00
589 changed files with 32667 additions and 10055 deletions
--- a/.github/actions/release-cn-artifacts/action.yaml
+++ b/.github/actions/release-cn-artifacts/action.yaml
@@ -64,11 +64,11 @@ inputs:
  upload-max-retry-times:
    description: Max retry times for uploading artifacts to S3
    required: false
-    default: "20"
+    default: "30"
  upload-retry-timeout:
    description: Timeout for uploading artifacts to S3
    required: false
-    default: "30" # minutes
+    default: "120" # minutes
 runs:
  using: composite
  steps:
--- a/.github/actions/setup-greptimedb-cluster/action.yml
+++ b/.github/actions/setup-greptimedb-cluster/action.yml
@@ -10,13 +10,13 @@ inputs:
  meta-replicas:
    default: 2
    description: "Number of Metasrv replicas"
-  image-registry: 
+  image-registry:
    default: "docker.io"
    description: "Image registry"
-  image-repository: 
+  image-repository:
    default: "greptime/greptimedb"
    description: "Image repository"
-  image-tag: 
+  image-tag:
    default: "latest"
    description: 'Image tag'
  etcd-endpoints:
@@ -32,12 +32,12 @@ runs:
  steps:
  - name: Install GreptimeDB operator
    uses: nick-fields/retry@v3
-    with: 
+    with:
      timeout_minutes: 3
      max_attempts: 3
      shell: bash
      command: |
-        helm repo add greptime https://greptimeteam.github.io/helm-charts/ 
+        helm repo add greptime https://greptimeteam.github.io/helm-charts/
        helm repo update
        helm upgrade \
          --install \
@@ -48,10 +48,10 @@ runs:
          --wait-for-jobs
  - name: Install GreptimeDB cluster
    shell: bash
-    run: | 
+    run: |
      helm upgrade \
        --install my-greptimedb \
-        --set meta.etcdEndpoints=${{ inputs.etcd-endpoints }} \
+        --set meta.backendStorage.etcd.endpoints=${{ inputs.etcd-endpoints }} \
        --set meta.enableRegionFailover=${{ inputs.enable-region-failover }} \
        --set image.registry=${{ inputs.image-registry }} \
        --set image.repository=${{ inputs.image-repository }}  \
@@ -59,7 +59,7 @@ runs:
        --set base.podTemplate.main.resources.requests.cpu=50m \
        --set base.podTemplate.main.resources.requests.memory=256Mi \
        --set base.podTemplate.main.resources.limits.cpu=2000m \
-        --set base.podTemplate.main.resources.limits.memory=2Gi \
+        --set base.podTemplate.main.resources.limits.memory=3Gi \
        --set frontend.replicas=${{ inputs.frontend-replicas }} \
        --set datanode.replicas=${{ inputs.datanode-replicas }} \
        --set meta.replicas=${{ inputs.meta-replicas }} \
@@ -72,7 +72,7 @@ runs:
  - name: Wait for GreptimeDB
    shell: bash
    run: |
-      while true; do 
+      while true; do
        PHASE=$(kubectl -n my-greptimedb get gtc my-greptimedb -o jsonpath='{.status.clusterPhase}')
        if [ "$PHASE" == "Running" ]; then
          echo "Cluster is ready"
@@ -86,10 +86,10 @@ runs:
  - name: Print GreptimeDB info
    if: always()
    shell: bash
-    run: | 
+    run: |
      kubectl get all --show-labels -n my-greptimedb
  - name: Describe Nodes
    if: always()
    shell: bash
-    run: | 
+    run: |
      kubectl describe nodes
--- a/.github/labeler.yaml
+++ b/.github/labeler.yaml
@@ -0,0 +1,15 @@
+ci:
+  - changed-files:
+      - any-glob-to-any-file: .github/**
+
+docker:
+  - changed-files:
+      - any-glob-to-any-file: docker/**
+
+documentation:
+  - changed-files:
+      - any-glob-to-any-file: docs/**
+
+dashboard:
+  - changed-files:
+      - any-glob-to-any-file: grafana/**
--- a/.github/scripts/deploy-greptimedb.sh
+++ b/.github/scripts/deploy-greptimedb.sh
@@ -68,7 +68,7 @@ function deploy_greptimedb_cluster() {

  helm install "$cluster_name" greptime/greptimedb-cluster \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
-    --set meta.etcdEndpoints="etcd.$install_namespace:2379" \
+    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
    -n "$install_namespace"

  # Wait for greptimedb cluster to be ready.
@@ -103,7 +103,7 @@ function deploy_greptimedb_cluster_with_s3_storage() {

  helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
-    --set meta.etcdEndpoints="etcd.$install_namespace:2379" \
+    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
    --set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
    --set storage.s3.region="$AWS_REGION" \
    --set storage.s3.root="$DATA_ROOT" \
--- a/.github/scripts/update-helm-charts-version.sh
+++ b/.github/scripts/update-helm-charts-version.sh
@@ -30,7 +30,7 @@ update_helm_charts_version() {

  # Commit the changes.
  git add .
-  git commit -m "chore: Update GreptimeDB version to ${VERSION}"
+  git commit -s -m "chore: Update GreptimeDB version to ${VERSION}"
  git push origin $BRANCH_NAME

  # Create a Pull Request.
--- a/.github/scripts/update-homebrew-greptme-version.sh
+++ b/.github/scripts/update-homebrew-greptme-version.sh
@@ -26,7 +26,7 @@ update_homebrew_greptime_version() {

  # Commit the changes.
  git add .
-  git commit -m "chore: Update GreptimeDB version to ${VERSION}"
+  git commit -s -m "chore: Update GreptimeDB version to ${VERSION}"
  git push origin $BRANCH_NAME

  # Create a Pull Request.
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -55,6 +55,11 @@ on:
        description: Build and push images to DockerHub and ACR
        required: false
        default: true
+      upload_artifacts_to_s3:
+        type: boolean
+        description: Whether upload artifacts to s3
+        required: false
+        default: false
      cargo_profile:
        type: choice
        description: The cargo profile to use in building GreptimeDB.
@@ -238,7 +243,7 @@ jobs:
          version: ${{ needs.allocate-runners.outputs.version }}
          push-latest-tag: false # Don't push the latest tag to registry.
          dev-mode: true # Only build the standard images.
-          
+
      - name: Echo Docker image tag to step summary
        run: |
          echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
@@ -281,7 +286,7 @@ jobs:
          aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
-          upload-to-s3: false
+          upload-to-s3: ${{ inputs.upload_artifacts_to_s3 }}
          dev-mode: true                     # Only build the standard images(exclude centos images).
          push-latest-tag: false             # Don't push the latest tag to registry.
          update-version-info: false         # Don't update the version info in S3.
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -250,6 +250,11 @@ jobs:
          name: unstable-fuzz-logs
          path: /tmp/unstable-greptime/
          retention-days: 3
+      - name: Describe pods
+        if: failure()
+        shell: bash
+        run: |
+          kubectl describe pod -n my-greptimedb

  build-greptime-ci:
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
@@ -405,6 +410,11 @@ jobs:
        shell: bash
        run: |
          kubectl describe nodes
+      - name: Describe pod
+        if: failure()
+        shell: bash
+        run: |
+          kubectl describe pod -n my-greptimedb
      - name: Export kind logs
        if: failure()
        shell: bash
@@ -554,6 +564,11 @@ jobs:
        shell: bash
        run: |
          kubectl describe nodes
+      - name: Describe pods
+        if: failure()
+        shell: bash
+        run: |
+          kubectl describe pod -n my-greptimedb
      - name: Export kind logs
        if: failure()
        shell: bash
--- a/.github/workflows/pr-labeling.yaml
+++ b/.github/workflows/pr-labeling.yaml
@@ -0,0 +1,42 @@
+name: 'PR Labeling'
+
+on:
+  pull_request_target:
+    types:
+      - opened
+      - synchronize
+      - reopened
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+jobs:
+  labeler:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v4
+
+      - uses: actions/labeler@v5
+        with:
+          configuration-path: ".github/labeler.yaml"
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
+
+  size-label:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: pascalgn/size-label-action@v0.5.5
+        env:
+          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+        with:
+          sizes: >
+            {
+              "0": "XS",
+              "100": "S",
+              "300": "M",
+              "1000": "L",
+              "1500": "XL",
+              "2000": "XXL"
+            }
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -441,8 +441,8 @@ jobs:
          aws-region: ${{ vars.EC2_RUNNER_REGION }}
          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}

-  bump-doc-version:
-    name: Bump doc version
+  bump-downstream-repo-versions:
+    name: Bump downstream repo versions
    if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [allocate-runners, publish-github-release]
    runs-on: ubuntu-latest
@@ -456,36 +456,16 @@ jobs:
          fetch-depth: 0
          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
-      - name: Bump doc version
+      - name: Bump downstream repo versions
        working-directory: cyborg
-        run: pnpm tsx bin/bump-doc-version.ts
-        env:
-          VERSION: ${{ needs.allocate-runners.outputs.version }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
-
-  bump-website-version:
-    name: Bump website version
-    if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
-    needs: [allocate-runners, publish-github-release]
-    runs-on: ubuntu-latest
-    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
-    permissions:
-      issues: write # Allows the action to create issues for cyborg.
-      contents: write # Allows the action to create a release.
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          persist-credentials: false
-      - uses: ./.github/actions/setup-cyborg
-      - name: Bump website version
-        working-directory: cyborg
-        run: pnpm tsx bin/bump-website-version.ts
+        run: pnpm tsx bin/bump-versions.ts
        env:
+          TARGET_REPOS: website,docs,demo
          VERSION: ${{ needs.allocate-runners.outputs.version }}
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          WEBSITE_REPO_TOKEN: ${{ secrets.WEBSITE_REPO_TOKEN }}
+          DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
+          DEMO_REPO_TOKEN: ${{ secrets.DEMO_REPO_TOKEN }}

  bump-helm-charts-version:
    name: Bump helm charts version
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,7 @@ members = [
    "src/common/recordbatch",
    "src/common/runtime",
    "src/common/session",
+    "src/common/stat",
    "src/common/substrait",
    "src/common/telemetry",
    "src/common/test-util",
@@ -48,6 +49,7 @@ members = [
    "src/meta-client",
    "src/meta-srv",
    "src/metric-engine",
+    "src/mito-codec",
    "src/mito2",
    "src/object-store",
    "src/operator",
@@ -115,15 +117,16 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
-datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
-datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
+datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-functions-aggregate-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
 deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
@@ -132,7 +135,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "442348b2518c0bf187fb1ad011ba370c38b96cc4" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "464226cf8a4a22696503536a123d0b9e318582f4" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -148,6 +151,7 @@ meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev =
 mockall = "0.13"
 moka = "0.12"
 nalgebra = "0.33"
+nix = { version = "0.30.1", default-features = false, features = ["event", "fs", "process"] }
 notify = "8.0"
 num_cpus = "1.16"
 object_store_opendal = "0.50"
@@ -272,6 +276,7 @@ log-store = { path = "src/log-store" }
 meta-client = { path = "src/meta-client" }
 meta-srv = { path = "src/meta-srv" }
 metric-engine = { path = "src/metric-engine" }
+mito-codec = { path = "src/mito-codec" }
 mito2 = { path = "src/mito2" }
 object-store = { path = "src/object-store" }
 operator = { path = "src/operator" }
@@ -287,6 +292,7 @@ query = { path = "src/query" }
 servers = { path = "src/servers" }
 session = { path = "src/session" }
 sql = { path = "src/sql" }
+stat = { path = "src/common/stat" }
 store-api = { path = "src/store-api" }
 substrait = { path = "src/common/substrait" }
 table = { path = "src/table" }
--- a/README.md
+++ b/README.md
@@ -189,7 +189,8 @@ We invite you to engage and contribute!
 - [Official Website](https://greptime.com/)
 - [Blog](https://greptime.com/blogs/)
 - [LinkedIn](https://www.linkedin.com/company/greptime/)
- [Twitter](https://twitter.com/greptime)
+- [X (Twitter)](https://X.com/greptime)
+- [YouTube](https://www.youtube.com/@greptime)

 ## License

--- a/config/config.md
+++ b/config/config.md
@@ -100,7 +100,7 @@
 | `query` | -- | -- | The query engine options. |
 | `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `storage` | -- | -- | The data storage options. |
-| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
+| `storage.data_home` | String | `./greptimedb_data` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
 | `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}`. An empty string means disabling. |
 | `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
@@ -123,6 +123,7 @@
 | `storage.http_client.connect_timeout` | String | `30s` | The timeout for only the connect phase of a http client. |
 | `storage.http_client.timeout` | String | `30s` | The total request timeout, applied from when the request starts connecting until the response body has finished.<br/>Also considered a total deadline. |
 | `storage.http_client.pool_idle_timeout` | String | `90s` | The timeout for idle sockets being kept-alive. |
+| `storage.http_client.skip_ssl_validation` | Bool | `false` | To skip the ssl verification<br/>**Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks. |
 | `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
 | `region_engine.mito` | -- | -- | The Mito engine options. |
 | `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
@@ -184,10 +185,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
@@ -195,13 +197,13 @@
 | `slow_query.record_type` | String | Unset | The record type of slow queries. It can be `system_table` or `log`. |
 | `slow_query.threshold` | String | Unset | The threshold of slow query. |
 | `slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
-| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics` | -- | -- | The standalone can export its metrics and send to Prometheus compatible service (e.g. `greptimedb`) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
 | `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommended to collect metrics generated by itself<br/>You must create the database before enabling it. |
 | `export_metrics.self_import.db` | String | Unset | -- |
 | `export_metrics.remote_write` | -- | -- | -- |
-| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
+| `export_metrics.remote_write.url` | String | `""` | The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
@@ -232,6 +234,7 @@
 | `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
 | `grpc.tls.mode` | String | `disable` | TLS mode. |
 | `grpc.tls.cert_path` | String | Unset | Certificate file path. |
@@ -286,10 +289,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
@@ -298,13 +302,11 @@
 | `slow_query.threshold` | String | `30s` | The threshold of slow query. It can be human readable time string, for example: `10s`, `100ms`, `1s`. |
 | `slow_query.sample_ratio` | Float | `1.0` | The sampling ratio of slow query log. The value should be in the range of (0, 1]. For example, `0.1` means 10% of the slow queries will be logged and `1.0` means all slow queries will be logged. |
 | `slow_query.ttl` | String | `30d` | The TTL of the `slow_queries` system table. Default is `30d` when `record_type` is `system_table`. |
-| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics` | -- | -- | The frontend can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
-| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
-| `export_metrics.self_import.db` | String | Unset | -- |
 | `export_metrics.remote_write` | -- | -- | -- |
-| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
+| `export_metrics.remote_write.url` | String | `""` | The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
@@ -314,23 +316,28 @@

 | Key | Type | Default | Descriptions |
 | --- | -----| ------- | ----------- |
-| `data_home` | String | `./greptimedb_data/metasrv/` | The working home directory. |
-| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
-| `server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
+| `data_home` | String | `./greptimedb_data` | The working home directory. |
 | `store_addrs` | Array | -- | Store server address default to etcd store.<br/>For postgres store, the format is:<br/>"password=password dbname=postgres user=postgres host=localhost port=5432"<br/>For etcd store, the format is:<br/>"127.0.0.1:2379" |
 | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
-| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store` |
+| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store`<br/>- `mysql_store` |
 | `meta_table_name` | String | `greptime_metakv` | Table name in RDS to store metadata. Effect when using a RDS kvbackend.<br/>**Only used when backend is `postgres_store`.** |
 | `meta_election_lock_id` | Integer | `1` | Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend<br/>Only used when backend is `postgres_store`. |
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
+| `region_failure_detector_initialization_delay` | String | `10m` | Delay before initializing region failure detectors.<br/>This delay helps prevent premature initialization of region failure detectors in cases where<br/>cluster maintenance mode is enabled right after metasrv starts, especially when the cluster<br/>is not deployed via the recommended GreptimeDB Operator. Without this delay, early detector registration<br/>may trigger unnecessary region failovers during datanode startup. |
 | `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
 | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
+| `grpc` | -- | -- | The gRPC server options. |
+| `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
+| `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
+| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
+| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
 | `http` | -- | -- | The HTTP server options. |
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
@@ -366,19 +373,18 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
-| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics` | -- | -- | The metasrv can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
-| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
-| `export_metrics.self_import.db` | String | Unset | -- |
 | `export_metrics.remote_write` | -- | -- | -- |
-| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
+| `export_metrics.remote_write.url` | String | `""` | The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
@@ -404,6 +410,7 @@
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
 | `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
 | `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
+| `grpc.flight_compression` | String | `arrow_ipc` | Compression mode for datanode side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
 | `grpc.tls.mode` | String | `disable` | TLS mode. |
 | `grpc.tls.cert_path` | String | Unset | Certificate file path. |
@@ -446,7 +453,7 @@
 | `query` | -- | -- | The query engine options. |
 | `query.parallelism` | Integer | `0` | Parallelism of the query engine.<br/>Default to 0, which means the number of CPU cores. |
 | `storage` | -- | -- | The data storage options. |
-| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
+| `storage.data_home` | String | `./greptimedb_data` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
 | `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}`. An empty string means disabling. |
 | `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
@@ -469,6 +476,7 @@
 | `storage.http_client.connect_timeout` | String | `30s` | The timeout for only the connect phase of a http client. |
 | `storage.http_client.timeout` | String | `30s` | The total request timeout, applied from when the request starts connecting until the response body has finished.<br/>Also considered a total deadline. |
 | `storage.http_client.pool_idle_timeout` | String | `90s` | The timeout for idle sockets being kept-alive. |
+| `storage.http_client.skip_ssl_validation` | Bool | `false` | To skip the ssl verification<br/>**Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks. |
 | `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
 | `region_engine.mito` | -- | -- | The Mito engine options. |
 | `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
@@ -530,19 +538,18 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
-| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
-| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
-| `export_metrics.self_import.db` | String | Unset | -- |
 | `export_metrics.remote_write` | -- | -- | -- |
-| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
+| `export_metrics.remote_write.url` | String | `""` | The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
 | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
@@ -582,10 +589,11 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
+| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -44,6 +44,13 @@ runtime_size = 8
 max_recv_message_size = "512MB"
 ## The maximum send message size for gRPC server.
 max_send_message_size = "512MB"
+## Compression mode for datanode side Arrow IPC service. Available options:
+## - `none`: disable all compression
+## - `transport`: only enable gRPC transport compression (zstd)
+## - `arrow_ipc`: only enable Arrow IPC compression (lz4)
+## - `all`: enable all compression.
+## Default to `none`
+flight_compression = "arrow_ipc"

 ## gRPC server TLS options, see `mysql.tls` section.
 [grpc.tls]
@@ -252,7 +259,7 @@ parallelism = 0
 ## The data storage options.
 [storage]
 ## The working home directory.
-data_home = "./greptimedb_data/"
+data_home = "./greptimedb_data"

 ## The storage type used to store the data.
 ## - `File`: the data is stored in the local file system.
@@ -360,6 +367,10 @@ timeout = "30s"
 ## The timeout for idle sockets being kept-alive.
 pool_idle_timeout = "90s"

+## To skip the ssl verification
+## **Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks.
+skip_ssl_validation = false
+
 # Custom storage options
 # [[storage.providers]]
 # name = "S3"
@@ -618,7 +629,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -629,30 +640,25 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

-## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
-
 ## whether enable export metrics.
 enable = false
-
 ## The interval of export metrics.
 write_interval = "30s"

-## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
-## You must create the database before enabling it.
-[export_metrics.self_import]
-## @toml2docs:none-default
-db = "greptime_metrics"
-
 [export_metrics.remote_write]
-## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
+## The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
 url = ""

 ## HTTP headers of Prometheus remote-write carry.
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -83,7 +83,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -94,6 +94,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -54,6 +54,13 @@ bind_addr = "127.0.0.1:4001"
 server_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8
+## Compression mode for frontend side Arrow IPC service. Available options:
+## - `none`: disable all compression
+## - `transport`: only enable gRPC transport compression (zstd)
+## - `arrow_ipc`: only enable Arrow IPC compression (lz4)
+## - `all`: enable all compression.
+## Default to `none`
+flight_compression = "arrow_ipc"

 ## gRPC server TLS options, see `mysql.tls` section.
 [grpc.tls]
@@ -211,7 +218,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -222,6 +229,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
@@ -247,24 +257,16 @@ sample_ratio = 1.0
 ## The TTL of the `slow_queries` system table. Default is `30d` when `record_type` is `system_table`.
 ttl = "30d"

-## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## The frontend can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
-
 ## whether enable export metrics.
 enable = false
-
 ## The interval of export metrics.
 write_interval = "30s"

-## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
-## You must create the database before enabling it.
-[export_metrics.self_import]
-## @toml2docs:none-default
-db = "greptime_metrics"
-
 [export_metrics.remote_write]
-## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
+## The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
 url = ""

 ## HTTP headers of Prometheus remote-write carry.
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -1,13 +1,5 @@
 ## The working home directory.
-data_home = "./greptimedb_data/metasrv/"
-
-## The bind address of metasrv.
-bind_addr = "127.0.0.1:3002"
-
-## The communication server address for the frontend and datanode to connect to metasrv.
-## If left empty or unset, the server will automatically use the IP address of the first network interface
-## on the host, with the same port number as the one specified in `bind_addr`.
-server_addr = "127.0.0.1:3002"
+data_home = "./greptimedb_data"

 ## Store server address default to etcd store.
 ## For postgres store, the format is:
@@ -24,6 +16,7 @@ store_key_prefix = ""
 ## - `etcd_store` (default value)
 ## - `memory_store`
 ## - `postgres_store`
+## - `mysql_store`
 backend = "etcd_store"

 ## Table name in RDS to store metadata. Effect when using a RDS kvbackend.
@@ -50,6 +43,13 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

+## Delay before initializing region failure detectors.
+## This delay helps prevent premature initialization of region failure detectors in cases where
+## cluster maintenance mode is enabled right after metasrv starts, especially when the cluster
+## is not deployed via the recommended GreptimeDB Operator. Without this delay, early detector registration
+## may trigger unnecessary region failovers during datanode startup.
+region_failure_detector_initialization_delay = '10m'
+
 ## Whether to allow region failover on local WAL.
 ## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
 allow_region_failover_on_local_wal = false
@@ -67,6 +67,21 @@ node_max_idle_time = "24hours"
 ## The number of threads to execute the runtime for global write operations.
 #+ compact_rt_size = 4

+## The gRPC server options.
+[grpc]
+## The address to bind the gRPC server.
+bind_addr = "127.0.0.1:3002"
+## The communication server address for the frontend and datanode to connect to metasrv.
+## If left empty or unset, the server will automatically use the IP address of the first network interface
+## on the host, with the same port number as the one specified in `bind_addr`.
+server_addr = "127.0.0.1:3002"
+## The number of server worker threads.
+runtime_size = 8
+## The maximum receive message size for gRPC server.
+max_recv_message_size = "512MB"
+## The maximum send message size for gRPC server.
+max_send_message_size = "512MB"
+
 ## The HTTP server options.
 [http]
 ## The address to bind the HTTP server.
@@ -212,7 +227,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -223,30 +238,25 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

-## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## The metasrv can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
-
 ## whether enable export metrics.
 enable = false
-
 ## The interval of export metrics.
 write_interval = "30s"

-## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
-## You must create the database before enabling it.
-[export_metrics.self_import]
-## @toml2docs:none-default
-db = "greptime_metrics"
-
 [export_metrics.remote_write]
-## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
+## The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
 url = ""

 ## HTTP headers of Prometheus remote-write carry.
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -350,7 +350,7 @@ parallelism = 0
 ## The data storage options.
 [storage]
 ## The working home directory.
-data_home = "./greptimedb_data/"
+data_home = "./greptimedb_data"

 ## The storage type used to store the data.
 ## - `File`: the data is stored in the local file system.
@@ -458,6 +458,10 @@ timeout = "30s"
 ## The timeout for idle sockets being kept-alive.
 pool_idle_timeout = "90s"

+## To skip the ssl verification
+## **Security Notice**: Setting `skip_ssl_validation = true` disables certificate verification, making connections vulnerable to man-in-the-middle attacks. Only use this in development or trusted private networks.
+skip_ssl_validation = false
+
 # Custom storage options
 # [[storage.providers]]
 # name = "S3"
@@ -716,7 +720,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4317"
+otlp_endpoint = "http://localhost:4318"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -727,6 +731,9 @@ log_format = "text"
 ## The maximum amount of log files.
 max_log_files = 720

+## The OTLP tracing export protocol. Can be `grpc`/`http`.
+otlp_export_protocol = "http"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
@@ -750,13 +757,11 @@ default_ratio = 1.0
 ## @toml2docs:none-default
 #+ sample_ratio = 1.0

-## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## The standalone can export its metrics and send to Prometheus compatible service (e.g. `greptimedb`) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
-
 ## whether enable export metrics.
 enable = false
-
 ## The interval of export metrics.
 write_interval = "30s"

@@ -767,7 +772,7 @@ write_interval = "30s"
 db = "greptime_metrics"

 [export_metrics.remote_write]
-## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
+## The prometheus remote write endpoint that the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
 url = ""

 ## HTTP headers of Prometheus remote-write carry.
--- a/cyborg/bin/bump-doc-version.ts
+++ b/cyborg/bin/bump-doc-version.ts
@@ -1,75 +0,0 @@
-/*
- * Copyright 2023 Greptime Team
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import * as core from "@actions/core";
-import {obtainClient} from "@/common";
-
-async function triggerWorkflow(workflowId: string, version: string) {
-  const docsClient = obtainClient("DOCS_REPO_TOKEN")
-  try {
-    await docsClient.rest.actions.createWorkflowDispatch({
-      owner: "GreptimeTeam",
-      repo: "docs",
-      workflow_id: workflowId,
-      ref: "main",
-      inputs: {
-        version,
-      },
-    });
-    console.log(`Successfully triggered ${workflowId} workflow with version ${version}`);
-  } catch (error) {
-    core.setFailed(`Failed to trigger workflow: ${error.message}`);
-  }
-}
-
-function determineWorkflow(version: string): [string, string] {
-  // Check if it's a nightly version
-  if (version.includes('nightly')) {
-    return ['bump-nightly-version.yml', version];
-  }
-
-  const parts = version.split('.');
-
-  if (parts.length !== 3) {
-    throw new Error('Invalid version format');
-  }
-
-  // If patch version (last number) is 0, it's a major version
-  // Return only major.minor version
-  if (parts[2] === '0') {
-    return ['bump-version.yml', `${parts[0]}.${parts[1]}`];
-  }
-
-  // Otherwise it's a patch version, use full version
-  return ['bump-patch-version.yml', version];
-}
-
-const version = process.env.VERSION;
-if (!version) {
-  core.setFailed("VERSION environment variable is required");
-  process.exit(1);
-}
-
-// Remove 'v' prefix if exists
-const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
-
-try {
-  const [workflowId, apiVersion] = determineWorkflow(cleanVersion);
-  triggerWorkflow(workflowId, apiVersion);
-} catch (error) {
-  core.setFailed(`Error processing version: ${error.message}`);
-  process.exit(1);
-}
--- a/cyborg/bin/bump-versions.ts
+++ b/cyborg/bin/bump-versions.ts
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2023 Greptime Team
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import * as core from "@actions/core";
+import {obtainClient} from "@/common";
+
+interface RepoConfig {
+  tokenEnv: string;
+  repo: string;
+  workflowLogic: (version: string) => [string, string] | null;
+}
+
+const REPO_CONFIGS: Record<string, RepoConfig> = {
+  website: {
+    tokenEnv: "WEBSITE_REPO_TOKEN",
+    repo: "website",
+    workflowLogic: (version: string) => {
+      // Skip nightly versions for website
+      if (version.includes('nightly')) {
+        console.log('Nightly version detected for website, skipping workflow trigger.');
+        return null;
+      }
+      return ['bump-patch-version.yml', version];
+    }
+  },
+  demo: {
+    tokenEnv: "DEMO_REPO_TOKEN",
+    repo: "demo-scene",
+    workflowLogic: (version: string) => {
+      // Skip nightly versions for demo
+      if (version.includes('nightly')) {
+        console.log('Nightly version detected for demo, skipping workflow trigger.');
+        return null;
+      }
+      return ['bump-patch-version.yml', version];
+    }
+  },
+  docs: {
+    tokenEnv: "DOCS_REPO_TOKEN",
+    repo: "docs",
+    workflowLogic: (version: string) => {
+      // Check if it's a nightly version
+      if (version.includes('nightly')) {
+        return ['bump-nightly-version.yml', version];
+      }
+
+      const parts = version.split('.');
+      if (parts.length !== 3) {
+        throw new Error('Invalid version format');
+      }
+
+      // If patch version (last number) is 0, it's a major version
+      // Return only major.minor version
+      if (parts[2] === '0') {
+        return ['bump-version.yml', `${parts[0]}.${parts[1]}`];
+      }
+
+      // Otherwise it's a patch version, use full version
+      return ['bump-patch-version.yml', version];
+    }
+  }
+};
+
+async function triggerWorkflow(repoConfig: RepoConfig, workflowId: string, version: string) {
+  const client = obtainClient(repoConfig.tokenEnv);
+  try {
+    await client.rest.actions.createWorkflowDispatch({
+      owner: "GreptimeTeam",
+      repo: repoConfig.repo,
+      workflow_id: workflowId,
+      ref: "main",
+      inputs: {
+        version,
+      },
+    });
+    console.log(`Successfully triggered ${workflowId} workflow for ${repoConfig.repo} with version ${version}`);
+  } catch (error) {
+    core.setFailed(`Failed to trigger workflow for ${repoConfig.repo}: ${error.message}`);
+    throw error;
+  }
+}
+
+async function processRepo(repoName: string, version: string) {
+  const repoConfig = REPO_CONFIGS[repoName];
+  if (!repoConfig) {
+    throw new Error(`Unknown repository: ${repoName}`);
+  }
+
+  try {
+    const workflowResult = repoConfig.workflowLogic(version);
+    if (workflowResult === null) {
+      // Skip this repo (e.g., nightly version for website)
+      return;
+    }
+
+    const [workflowId, apiVersion] = workflowResult;
+    await triggerWorkflow(repoConfig, workflowId, apiVersion);
+  } catch (error) {
+    core.setFailed(`Error processing ${repoName} with version ${version}: ${error.message}`);
+    throw error;
+  }
+}
+
+async function main() {
+  const version = process.env.VERSION;
+  if (!version) {
+    core.setFailed("VERSION environment variable is required");
+    process.exit(1);
+  }
+
+  // Remove 'v' prefix if exists
+  const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
+
+  // Get target repositories from environment variable
+  // Default to both if not specified
+  const targetRepos = process.env.TARGET_REPOS?.split(',').map(repo => repo.trim()) || ['website', 'docs'];
+
+  console.log(`Processing version ${cleanVersion} for repositories: ${targetRepos.join(', ')}`);
+
+  const errors: string[] = [];
+
+  // Process each repository
+  for (const repo of targetRepos) {
+    try {
+      await processRepo(repo, cleanVersion);
+    } catch (error) {
+      errors.push(`${repo}: ${error.message}`);
+    }
+  }
+
+  if (errors.length > 0) {
+    core.setFailed(`Failed to process some repositories: ${errors.join('; ')}`);
+    process.exit(1);
+  }
+
+  console.log('All repositories processed successfully');
+}
+
+// Execute main function
+main().catch((error) => {
+  core.setFailed(`Unexpected error: ${error.message}`);
+  process.exit(1);
+});
--- a/cyborg/bin/bump-website-version.ts
+++ b/cyborg/bin/bump-website-version.ts
@@ -1,57 +0,0 @@
-/*
- * Copyright 2023 Greptime Team
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import * as core from "@actions/core";
-import {obtainClient} from "@/common";
-
-async function triggerWorkflow(workflowId: string, version: string) {
-  const websiteClient = obtainClient("WEBSITE_REPO_TOKEN")
-  try {
-    await websiteClient.rest.actions.createWorkflowDispatch({
-      owner: "GreptimeTeam",
-      repo: "website",
-      workflow_id: workflowId,
-      ref: "main",
-      inputs: {
-        version,
-      },
-    });
-    console.log(`Successfully triggered ${workflowId} workflow with version ${version}`);
-  } catch (error) {
-    core.setFailed(`Failed to trigger workflow: ${error.message}`);
-  }
-}
-
-const version = process.env.VERSION;
-if (!version) {
-  core.setFailed("VERSION environment variable is required");
-  process.exit(1);
-}
-
-// Remove 'v' prefix if exists
-const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
-
-if (cleanVersion.includes('nightly')) {
-  console.log('Nightly version detected, skipping workflow trigger.');
-  process.exit(0);
-}
-
-try {
-  triggerWorkflow('bump-patch-version.yml', cleanVersion);
-} catch (error) {
-  core.setFailed(`Error processing version: ${error.message}`);
-  process.exit(1);
-}
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -1,6 +1,6 @@
 # Profile memory usage of GreptimeDB

-This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](docs/how-to/memory-profile-scripts).
+This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](./memory-profile-scripts/scripts).

 ## Prerequisites
 ### jemalloc
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -9,7 +9,7 @@ We highly recommend using the self-monitoring feature provided by [GreptimeDB Op
 - **Metrics Dashboards**

  - `dashboards/metrics/cluster/dashboard.json`: The Grafana dashboard for the GreptimeDB cluster. Read the [dashboard.md](./dashboards/metrics/cluster/dashboard.md) for more details.
-  
+
  - `dashboards/metrics/standalone/dashboard.json`: The Grafana dashboard for the standalone GreptimeDB instance. **It's generated from the `cluster/dashboard.json` by removing the instance filter through the `make dashboards` command**. Read the [dashboard.md](./dashboards/metrics/standalone/dashboard.md) for more details.

 - **Logs Dashboard**
@@ -83,7 +83,7 @@ If you use the [Helm Chart](https://github.com/GreptimeTeam/helm-charts) to depl
 - `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
 - `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;

-The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
+The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/getting-started).

 ### Self-host Prometheus and import dashboards manually

--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -60,7 +60,7 @@
 | Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
-| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
+| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))`<br/>`sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
 | Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
 | WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
 | Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
@@ -69,7 +69,9 @@
 | Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
 | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 | Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`<br/>`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
-| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
+| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -87,9 +89,19 @@
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
-| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
-| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
-| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
+| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `status-history` | Counter of region migration by source and destination | `prometheus` | -- | `from-datanode-{{datanode_id}}` |
+| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `{{pod}}-{{state}}-{{error_type}}` |
+| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `binBps` | `Datanode-{{datanode_id}}-writeload` |
+| Rate of SQL Executions (RDS) | `rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_count[$__rate_interval])` | `timeseries` | Displays the rate of SQL executions processed by the Meta service using the RDS backend. | `prometheus` | `none` | `{{pod}} {{op}} {{type}} {{result}} ` |
+| SQL Execution Latency (RDS) | `histogram_quantile(0.90, sum by(pod, op, type, result, le) (rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_bucket[$__rate_interval])))` | `timeseries` | Measures the response time of SQL executions via the RDS backend.  | `prometheus` | `ms` | `{{pod}} {{op}} {{type}} {{result}} p90` |
+| Handler Execution Latency | `histogram_quantile(0.90, sum by(pod, le, name) (
+  rate(greptime_meta_handler_execute_bucket[$__rate_interval])
+))` | `timeseries` | Shows latency of Meta handlers by pod and handler name, useful for monitoring handler performance and detecting latency spikes.<br/> | `prometheus` | `s` | `{{pod}} {{name}} p90` |
+| Heartbeat Packet Size | `histogram_quantile(0.9, sum by(pod, le) (greptime_meta_heartbeat_stat_memory_size_bucket))` | `timeseries` | Shows p90 heartbeat message sizes, helping track network usage and identify anomalies in heartbeat payload.<br/> | `prometheus` | `bytes` | `{{pod}}` |
+| Meta Heartbeat Receive Rate | `rate(greptime_meta_heartbeat_rate[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}` |
+| Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
+| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
+| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -487,7 +487,7 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{pod}}]'
-        - title: Compaction P99 per Instance by Stage
+        - title: Compaction Elapsed Time per Instance by Stage
          type: timeseries
          description: Compaction latency by stage
          unit: s
@@ -497,6 +497,11 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
+            - expr: sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg'
        - title: Compaction P99 per Instance
          type: timeseries
          description: Compaction P99 per Instance.
@@ -602,7 +607,37 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
-            - expr: sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))
+            - expr: sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+        - title: Active Series and Field Builders Count
+          type: timeseries
+          description: Compaction oinput output bytes
+          unit: none
+          queries:
+            - expr: sum by(instance, pod) (greptime_mito_memtable_active_series_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-series'
+            - expr: sum by(instance, pod) (greptime_mito_memtable_field_builder_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-field_builders'
+        - title: Region Worker Convert Requests
+          type: timeseries
+          description: Per-stage elapsed time for region worker to decode requests.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
+            - expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
@@ -721,9 +756,8 @@ groups:
    - title: Metasrv
      panels:
        - title: Region migration datanode
-          type: state-timeline
+          type: status-history
          description: Counter of region migration by source and destination
-          unit: none
          queries:
            - expr: greptime_meta_region_migration_stat{datanode_type="src"}
              datasource:
@@ -744,17 +778,127 @@ groups:
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: __auto
+              legendFormat: '{{pod}}-{{state}}-{{error_type}}'
        - title: Datanode load
          type: timeseries
          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
-          unit: none
+          unit: binBps
          queries:
            - expr: greptime_datanode_load
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: __auto
+              legendFormat: Datanode-{{datanode_id}}-writeload
+        - title: Rate of SQL Executions (RDS)
+          type: timeseries
+          description: Displays the rate of SQL executions processed by the Meta service using the RDS backend.
+          unit: none
+          queries:
+            - expr: rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_count[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}} {{op}} {{type}} {{result}} '
+        - title: SQL Execution Latency (RDS)
+          type: timeseries
+          description: 'Measures the response time of SQL executions via the RDS backend. '
+          unit: ms
+          queries:
+            - expr: histogram_quantile(0.90, sum by(pod, op, type, result, le) (rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}} {{op}} {{type}} {{result}} p90'
+        - title: Handler Execution Latency
+          type: timeseries
+          description: |
+            Shows latency of Meta handlers by pod and handler name, useful for monitoring handler performance and detecting latency spikes.
+          unit: s
+          queries:
+            - expr: |-
+                histogram_quantile(0.90, sum by(pod, le, name) (
+                  rate(greptime_meta_handler_execute_bucket[$__rate_interval])
+                ))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}} {{name}} p90'
+        - title: Heartbeat Packet Size
+          type: timeseries
+          description: |
+            Shows p90 heartbeat message sizes, helping track network usage and identify anomalies in heartbeat payload.
+          unit: bytes
+          queries:
+            - expr: histogram_quantile(0.9, sum by(pod, le) (greptime_meta_heartbeat_stat_memory_size_bucket))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}'
+        - title: Meta Heartbeat Receive Rate
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: s
+          queries:
+            - expr: rate(greptime_meta_heartbeat_rate[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}'
+        - title: Meta KV Ops Latency
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{op}} p99'
+        - title: Rate of meta KV Ops
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: none
+          queries:
+            - expr: rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{op}} p99'
+        - title: DDL Latency
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateLogicalTables-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateTable-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateView-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateFlow-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: DropTable-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: AlterTable-{{step}} p90
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -60,7 +60,7 @@
 | Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
 | Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
-| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
+| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))`<br/>`sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
 | Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
 | WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
 | Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
@@ -69,7 +69,9 @@
 | Log Store op duration seconds | `histogram_quantile(0.99, sum by(le,logstore,optype,instance, pod) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))` | `timeseries` | Write-ahead log operations latency at p99 | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{logstore}}]-[{{optype}}]-p99` |
 | Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
 | Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`<br/>`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
-| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
+| Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
+| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
 # OpenDAL
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -87,9 +89,19 @@
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
-| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `state-timeline` | Counter of region migration by source and destination | `prometheus` | `none` | `from-datanode-{{datanode_id}}` |
-| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `__auto` |
-| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `__auto` |
+| Region migration datanode | `greptime_meta_region_migration_stat{datanode_type="src"}`<br/>`greptime_meta_region_migration_stat{datanode_type="desc"}` | `status-history` | Counter of region migration by source and destination | `prometheus` | -- | `from-datanode-{{datanode_id}}` |
+| Region migration error | `greptime_meta_region_migration_error` | `timeseries` | Counter of region migration error | `prometheus` | `none` | `{{pod}}-{{state}}-{{error_type}}` |
+| Datanode load | `greptime_datanode_load` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `binBps` | `Datanode-{{datanode_id}}-writeload` |
+| Rate of SQL Executions (RDS) | `rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_count[$__rate_interval])` | `timeseries` | Displays the rate of SQL executions processed by the Meta service using the RDS backend. | `prometheus` | `none` | `{{pod}} {{op}} {{type}} {{result}} ` |
+| SQL Execution Latency (RDS) | `histogram_quantile(0.90, sum by(pod, op, type, result, le) (rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_bucket[$__rate_interval])))` | `timeseries` | Measures the response time of SQL executions via the RDS backend.  | `prometheus` | `ms` | `{{pod}} {{op}} {{type}} {{result}} p90` |
+| Handler Execution Latency | `histogram_quantile(0.90, sum by(pod, le, name) (
+  rate(greptime_meta_handler_execute_bucket[$__rate_interval])
+))` | `timeseries` | Shows latency of Meta handlers by pod and handler name, useful for monitoring handler performance and detecting latency spikes.<br/> | `prometheus` | `s` | `{{pod}} {{name}} p90` |
+| Heartbeat Packet Size | `histogram_quantile(0.9, sum by(pod, le) (greptime_meta_heartbeat_stat_memory_size_bucket))` | `timeseries` | Shows p90 heartbeat message sizes, helping track network usage and identify anomalies in heartbeat payload.<br/> | `prometheus` | `bytes` | `{{pod}}` |
+| Meta Heartbeat Receive Rate | `rate(greptime_meta_heartbeat_rate[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}` |
+| Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
+| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
+| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -487,7 +487,7 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{ instance }}]-[{{pod}}]'
-        - title: Compaction P99 per Instance by Stage
+        - title: Compaction Elapsed Time per Instance by Stage
          type: timeseries
          description: Compaction latency by stage
          unit: s
@@ -497,6 +497,11 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
+            - expr: sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg'
        - title: Compaction P99 per Instance
          type: timeseries
          description: Compaction P99 per Instance.
@@ -602,7 +607,37 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
-            - expr: sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))
+            - expr: sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
+        - title: Active Series and Field Builders Count
+          type: timeseries
+          description: Compaction oinput output bytes
+          unit: none
+          queries:
+            - expr: sum by(instance, pod) (greptime_mito_memtable_active_series_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-series'
+            - expr: sum by(instance, pod) (greptime_mito_memtable_field_builder_count)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-field_builders'
+        - title: Region Worker Convert Requests
+          type: timeseries
+          description: Per-stage elapsed time for region worker to decode requests.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
+            - expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))
              datasource:
                type: prometheus
                uid: ${metrics}
@@ -721,9 +756,8 @@ groups:
    - title: Metasrv
      panels:
        - title: Region migration datanode
-          type: state-timeline
+          type: status-history
          description: Counter of region migration by source and destination
-          unit: none
          queries:
            - expr: greptime_meta_region_migration_stat{datanode_type="src"}
              datasource:
@@ -744,17 +778,127 @@ groups:
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: __auto
+              legendFormat: '{{pod}}-{{state}}-{{error_type}}'
        - title: Datanode load
          type: timeseries
          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
-          unit: none
+          unit: binBps
          queries:
            - expr: greptime_datanode_load
              datasource:
                type: prometheus
                uid: ${metrics}
-              legendFormat: __auto
+              legendFormat: Datanode-{{datanode_id}}-writeload
+        - title: Rate of SQL Executions (RDS)
+          type: timeseries
+          description: Displays the rate of SQL executions processed by the Meta service using the RDS backend.
+          unit: none
+          queries:
+            - expr: rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_count[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}} {{op}} {{type}} {{result}} '
+        - title: SQL Execution Latency (RDS)
+          type: timeseries
+          description: 'Measures the response time of SQL executions via the RDS backend. '
+          unit: ms
+          queries:
+            - expr: histogram_quantile(0.90, sum by(pod, op, type, result, le) (rate(greptime_meta_rds_pg_sql_execute_elapsed_ms_bucket[$__rate_interval])))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}} {{op}} {{type}} {{result}} p90'
+        - title: Handler Execution Latency
+          type: timeseries
+          description: |
+            Shows latency of Meta handlers by pod and handler name, useful for monitoring handler performance and detecting latency spikes.
+          unit: s
+          queries:
+            - expr: |-
+                histogram_quantile(0.90, sum by(pod, le, name) (
+                  rate(greptime_meta_handler_execute_bucket[$__rate_interval])
+                ))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}} {{name}} p90'
+        - title: Heartbeat Packet Size
+          type: timeseries
+          description: |
+            Shows p90 heartbeat message sizes, helping track network usage and identify anomalies in heartbeat payload.
+          unit: bytes
+          queries:
+            - expr: histogram_quantile(0.9, sum by(pod, le) (greptime_meta_heartbeat_stat_memory_size_bucket))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}'
+        - title: Meta Heartbeat Receive Rate
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: s
+          queries:
+            - expr: rate(greptime_meta_heartbeat_rate[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}'
+        - title: Meta KV Ops Latency
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{op}} p99'
+        - title: Rate of meta KV Ops
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: none
+          queries:
+            - expr: rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{op}} p99'
+        - title: DDL Latency
+          type: timeseries
+          description: Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads.
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateLogicalTables-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateTable-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateView-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: CreateFlow-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: DropTable-{{step}} p90
+            - expr: histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: AlterTable-{{step}} p90
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
--- a/grafana/scripts/gen-dashboards.sh
+++ b/grafana/scripts/gen-dashboards.sh
@@ -6,7 +6,7 @@ DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35

 remove_instance_filters() {
  # Remove the instance filters for the standalone dashboards.
-  sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
+  sed -E 's/instance=~\\"(\$datanode|\$frontend|\$metasrv|\$flownode)\\",?//g' "$CLUSTER_DASHBOARD_DIR/dashboard.json" > "$STANDALONE_DASHBOARD_DIR/dashboard.json"
 }

 generate_intermediate_dashboards_and_docs() {
--- a/licenserc.toml
+++ b/licenserc.toml
@@ -27,8 +27,11 @@ excludes = [
    "src/servers/src/repeated_field.rs",
    "src/servers/src/http/test_helpers.rs",
    # enterprise
+    "src/common/meta/src/rpc/ddl/trigger.rs",
+    "src/operator/src/expr_helper/trigger.rs",
    "src/sql/src/statements/create/trigger.rs",
    "src/sql/src/statements/show/trigger.rs",
+    "src/sql/src/statements/drop/trigger.rs",
    "src/sql/src/parsers/create_parser/trigger.rs",
    "src/sql/src/parsers/show_parser/trigger.rs",
 ]
--- a/src/api/src/region.rs
+++ b/src/api/src/region.rs
@@ -22,6 +22,7 @@ use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
 pub struct RegionResponse {
    pub affected_rows: AffectedRows,
    pub extensions: HashMap<String, Vec<u8>>,
+    pub metadata: Vec<u8>,
 }

 impl RegionResponse {
@@ -29,6 +30,7 @@ impl RegionResponse {
        Self {
            affected_rows: region_response.affected_rows as _,
            extensions: region_response.extensions,
+            metadata: region_response.metadata,
        }
    }

@@ -37,6 +39,16 @@ impl RegionResponse {
        Self {
            affected_rows,
            extensions: Default::default(),
+            metadata: Vec::new(),
+        }
+    }
+
+    /// Creates one response with metadata.
+    pub fn from_metadata(metadata: Vec<u8>) -> Self {
+        Self {
+            affected_rows: 0,
+            extensions: Default::default(),
+            metadata,
        }
    }
 }
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -17,8 +17,10 @@ arrow-schema.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
 bytes.workspace = true
+common-base.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
+common-frontend.workspace = true
 common-macro.workspace = true
 common-meta.workspace = true
 common-procedure.workspace = true
--- a/src/catalog/src/error.rs
+++ b/src/catalog/src/error.rs
@@ -277,6 +277,26 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Failed to invoke frontend services"))]
+    InvokeFrontend {
+        source: common_frontend::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Meta client is not provided"))]
+    MetaClientMissing {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to find frontend node: {}", addr))]
+    FrontendNotFound {
+        addr: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 impl Error {
@@ -345,6 +365,10 @@ impl ErrorExt for Error {
            Error::GetViewCache { source, .. } | Error::GetTableCache { source, .. } => {
                source.status_code()
            }
+            Error::InvokeFrontend { source, .. } => source.status_code(),
+            Error::FrontendNotFound { .. } | Error::MetaClientMissing { .. } => {
+                StatusCode::Unexpected
+            }
        }
    }

--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -22,7 +22,9 @@ use common_catalog::consts::{
    PG_CATALOG_NAME,
 };
 use common_error::ext::BoxedError;
-use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef};
+use common_meta::cache::{
+    LayeredCacheRegistryRef, TableRoute, TableRouteCacheRef, ViewInfoCacheRef,
+};
 use common_meta::key::catalog_name::CatalogNameKey;
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::schema_name::SchemaNameKey;
@@ -51,6 +53,7 @@ use crate::error::{
 };
 use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
 use crate::kvbackend::TableCacheRef;
+use crate::process_manager::ProcessManagerRef;
 use crate::system_schema::pg_catalog::PGCatalogProvider;
 use crate::system_schema::SystemSchemaProvider;
 use crate::CatalogManager;
@@ -84,6 +87,7 @@ impl KvBackendCatalogManager {
        backend: KvBackendRef,
        cache_registry: LayeredCacheRegistryRef,
        procedure_manager: Option<ProcedureManagerRef>,
+        process_manager: Option<ProcessManagerRef>,
    ) -> Arc<Self> {
        Arc::new_cyclic(|me| Self {
            information_extension,
@@ -102,12 +106,14 @@ impl KvBackendCatalogManager {
                    DEFAULT_CATALOG_NAME.to_string(),
                    me.clone(),
                    Arc::new(FlowMetadataManager::new(backend.clone())),
+                    process_manager.clone(),
                )),
                pg_catalog_provider: Arc::new(PGCatalogProvider::new(
                    DEFAULT_CATALOG_NAME.to_string(),
                    me.clone(),
                )),
                backend,
+                process_manager,
            },
            cache_registry,
            procedure_manager,
@@ -262,16 +268,68 @@ impl CatalogManager for KvBackendCatalogManager {
        let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu {
            name: "table_cache",
        })?;
-        if let Some(table) = table_cache
+        let table_route_cache: TableRouteCacheRef =
+            self.cache_registry.get().context(CacheNotFoundSnafu {
+                name: "table_route_cache",
+            })?;
+        let table = table_cache
            .get_by_ref(&TableName {
                catalog_name: catalog_name.to_string(),
                schema_name: schema_name.to_string(),
                table_name: table_name.to_string(),
            })
            .await
-            .context(GetTableCacheSnafu)?
+            .context(GetTableCacheSnafu)?;
+
+        // Override logical table's partition key indices with physical table's.
+        if let Some(table) = &table
+            && let Some(table_route_value) = table_route_cache
+                .get(table.table_info().table_id())
+                .await
+                .context(TableMetadataManagerSnafu)?
+            && let TableRoute::Logical(logical_route) = &*table_route_value
+            && let Some(physical_table_info_value) = self
+                .table_metadata_manager
+                .table_info_manager()
+                .get(logical_route.physical_table_id())
+                .await
+                .context(TableMetadataManagerSnafu)?
        {
-            return Ok(Some(table));
+            let mut new_table_info = (*table.table_info()).clone();
+            // Gather all column names from the logical table
+            let logical_column_names: std::collections::HashSet<_> = new_table_info
+                .meta
+                .schema
+                .column_schemas()
+                .iter()
+                .map(|col| &col.name)
+                .collect();
+
+            // Only preserve partition key indices where the corresponding columns exist in logical table
+            new_table_info.meta.partition_key_indices = physical_table_info_value
+                .table_info
+                .meta
+                .partition_key_indices
+                .iter()
+                .filter(|&&index| {
+                    if let Some(physical_column) = physical_table_info_value
+                        .table_info
+                        .meta
+                        .schema
+                        .column_schemas
+                        .get(index)
+                    {
+                        logical_column_names.contains(&physical_column.name)
+                    } else {
+                        false
+                    }
+                })
+                .cloned()
+                .collect();
+
+            let new_table = DistTable::table(Arc::new(new_table_info));
+
+            return Ok(Some(new_table));
        }

        if channel == Channel::Postgres {
@@ -284,7 +342,7 @@ impl CatalogManager for KvBackendCatalogManager {
            }
        }

-        return Ok(None);
+        Ok(table)
    }

    async fn tables_by_ids(
@@ -419,6 +477,7 @@ struct SystemCatalog {
    information_schema_provider: Arc<InformationSchemaProvider>,
    pg_catalog_provider: Arc<PGCatalogProvider>,
    backend: KvBackendRef,
+    process_manager: Option<ProcessManagerRef>,
 }

 impl SystemCatalog {
@@ -486,6 +545,7 @@ impl SystemCatalog {
                        catalog.to_string(),
                        self.catalog_manager.clone(),
                        Arc::new(FlowMetadataManager::new(self.backend.clone())),
+                        self.process_manager.clone(),
                    ))
                });
            information_schema_provider.table(table_name)
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -14,6 +14,7 @@

 #![feature(assert_matches)]
 #![feature(try_blocks)]
+#![feature(let_chains)]

 use std::any::Any;
 use std::fmt::{Debug, Formatter};
@@ -40,6 +41,7 @@ pub mod information_schema {
    pub use crate::system_schema::information_schema::*;
 }

+pub mod process_manager;
 pub mod table_source;

 #[async_trait::async_trait]
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -356,6 +356,7 @@ impl MemoryCatalogManager {
            catalog,
            Arc::downgrade(self) as Weak<dyn CatalogManager>,
            Arc::new(FlowMetadataManager::new(Arc::new(MemoryKvBackend::new()))),
+            None, // we don't need ProcessManager on regions server.
        );
        let information_schema = information_schema_provider.tables().clone();

--- a/src/catalog/src/metrics.rs
+++ b/src/catalog/src/metrics.rs
@@ -34,4 +34,20 @@ lazy_static! {
        register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap();
    pub static ref METRIC_CATALOG_KV_BATCH_GET: Histogram =
        register_histogram!("greptime_catalog_kv_batch_get", "catalog kv batch get").unwrap();
+
+    /// Count of running process in each catalog.
+    pub static ref PROCESS_LIST_COUNT: IntGaugeVec = register_int_gauge_vec!(
+        "greptime_process_list_count",
+        "Running process count per catalog",
+        &["catalog"]
+    )
+    .unwrap();
+
+    /// Count of killed process in each catalog.
+    pub static ref PROCESS_KILL_COUNT: IntCounterVec = register_int_counter_vec!(
+        "greptime_process_kill_count",
+        "Completed kill process requests count",
+        &["catalog"]
+    )
+    .unwrap();
 }
--- a/src/catalog/src/process_manager.rs
+++ b/src/catalog/src/process_manager.rs
@@ -0,0 +1,494 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::fmt::{Debug, Formatter};
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::{Arc, RwLock};
+
+use api::v1::frontend::{KillProcessRequest, ListProcessRequest, ProcessInfo};
+use common_base::cancellation::CancellationHandle;
+use common_frontend::selector::{FrontendSelector, MetaClientSelector};
+use common_telemetry::{debug, info};
+use common_time::util::current_time_millis;
+use meta_client::MetaClientRef;
+use snafu::{ensure, OptionExt, ResultExt};
+
+use crate::error;
+use crate::metrics::{PROCESS_KILL_COUNT, PROCESS_LIST_COUNT};
+
+pub type ProcessId = u32;
+pub type ProcessManagerRef = Arc<ProcessManager>;
+
+/// Query process manager.
+pub struct ProcessManager {
+    /// Local frontend server address,
+    server_addr: String,
+    /// Next process id for local queries.
+    next_id: AtomicU32,
+    /// Running process per catalog.
+    catalogs: RwLock<HashMap<String, HashMap<ProcessId, CancellableProcess>>>,
+    /// Frontend selector to locate frontend nodes.
+    frontend_selector: Option<MetaClientSelector>,
+}
+
+impl ProcessManager {
+    /// Create a [ProcessManager] instance with server address and kv client.
+    pub fn new(server_addr: String, meta_client: Option<MetaClientRef>) -> Self {
+        let frontend_selector = meta_client.map(MetaClientSelector::new);
+        Self {
+            server_addr,
+            next_id: Default::default(),
+            catalogs: Default::default(),
+            frontend_selector,
+        }
+    }
+}
+
+impl ProcessManager {
+    /// Registers a submitted query. Use the provided id if present.
+    #[must_use]
+    pub fn register_query(
+        self: &Arc<Self>,
+        catalog: String,
+        schemas: Vec<String>,
+        query: String,
+        client: String,
+        query_id: Option<ProcessId>,
+    ) -> Ticket {
+        let id = query_id.unwrap_or_else(|| self.next_id.fetch_add(1, Ordering::Relaxed));
+        let process = ProcessInfo {
+            id,
+            catalog: catalog.clone(),
+            schemas,
+            query,
+            start_timestamp: current_time_millis(),
+            client,
+            frontend: self.server_addr.clone(),
+        };
+        let cancellation_handle = Arc::new(CancellationHandle::default());
+        let cancellable_process = CancellableProcess::new(cancellation_handle.clone(), process);
+
+        self.catalogs
+            .write()
+            .unwrap()
+            .entry(catalog.clone())
+            .or_default()
+            .insert(id, cancellable_process);
+
+        Ticket {
+            catalog,
+            manager: self.clone(),
+            id,
+            cancellation_handle,
+        }
+    }
+
+    /// Generates the next process id.
+    pub fn next_id(&self) -> u32 {
+        self.next_id.fetch_add(1, Ordering::Relaxed)
+    }
+
+    /// De-register a query from process list.
+    pub fn deregister_query(&self, catalog: String, id: ProcessId) {
+        if let Entry::Occupied(mut o) = self.catalogs.write().unwrap().entry(catalog) {
+            let process = o.get_mut().remove(&id);
+            debug!("Deregister process: {:?}", process);
+            if o.get().is_empty() {
+                o.remove();
+            }
+        }
+    }
+
+    /// List local running processes in given catalog.
+    pub fn local_processes(&self, catalog: Option<&str>) -> error::Result<Vec<ProcessInfo>> {
+        let catalogs = self.catalogs.read().unwrap();
+        let result = if let Some(catalog) = catalog {
+            if let Some(catalogs) = catalogs.get(catalog) {
+                catalogs.values().map(|p| p.process.clone()).collect()
+            } else {
+                vec![]
+            }
+        } else {
+            catalogs
+                .values()
+                .flat_map(|v| v.values().map(|p| p.process.clone()))
+                .collect()
+        };
+        Ok(result)
+    }
+
+    pub async fn list_all_processes(
+        &self,
+        catalog: Option<&str>,
+    ) -> error::Result<Vec<ProcessInfo>> {
+        let mut processes = vec![];
+        if let Some(remote_frontend_selector) = self.frontend_selector.as_ref() {
+            let frontends = remote_frontend_selector
+                .select(|node| node.peer.addr != self.server_addr)
+                .await
+                .context(error::InvokeFrontendSnafu)?;
+            for mut f in frontends {
+                processes.extend(
+                    f.list_process(ListProcessRequest {
+                        catalog: catalog.unwrap_or_default().to_string(),
+                    })
+                    .await
+                    .context(error::InvokeFrontendSnafu)?
+                    .processes,
+                );
+            }
+        }
+        processes.extend(self.local_processes(catalog)?);
+        Ok(processes)
+    }
+
+    /// Kills query with provided catalog and id.
+    pub async fn kill_process(
+        &self,
+        server_addr: String,
+        catalog: String,
+        id: ProcessId,
+    ) -> error::Result<bool> {
+        if server_addr == self.server_addr {
+            self.kill_local_process(catalog, id).await
+        } else {
+            let mut nodes = self
+                .frontend_selector
+                .as_ref()
+                .context(error::MetaClientMissingSnafu)?
+                .select(|node| node.peer.addr == server_addr)
+                .await
+                .context(error::InvokeFrontendSnafu)?;
+            ensure!(
+                !nodes.is_empty(),
+                error::FrontendNotFoundSnafu { addr: server_addr }
+            );
+
+            let request = KillProcessRequest {
+                server_addr,
+                catalog,
+                process_id: id,
+            };
+            nodes[0]
+                .kill_process(request)
+                .await
+                .context(error::InvokeFrontendSnafu)?;
+            Ok(true)
+        }
+    }
+
+    /// Kills local query with provided catalog and id.
+    pub async fn kill_local_process(&self, catalog: String, id: ProcessId) -> error::Result<bool> {
+        if let Some(catalogs) = self.catalogs.write().unwrap().get_mut(&catalog) {
+            if let Some(process) = catalogs.remove(&id) {
+                process.handle.cancel();
+                info!(
+                    "Killed process, catalog: {}, id: {:?}",
+                    process.process.catalog, process.process.id
+                );
+                PROCESS_KILL_COUNT.with_label_values(&[&catalog]).inc();
+                Ok(true)
+            } else {
+                debug!("Failed to kill process, id not found: {}", id);
+                Ok(false)
+            }
+        } else {
+            debug!("Failed to kill process, catalog not found: {}", catalog);
+            Ok(false)
+        }
+    }
+}
+
+pub struct Ticket {
+    pub(crate) catalog: String,
+    pub(crate) manager: ProcessManagerRef,
+    pub(crate) id: ProcessId,
+    pub cancellation_handle: Arc<CancellationHandle>,
+}
+
+impl Drop for Ticket {
+    fn drop(&mut self) {
+        self.manager
+            .deregister_query(std::mem::take(&mut self.catalog), self.id);
+    }
+}
+
+struct CancellableProcess {
+    handle: Arc<CancellationHandle>,
+    process: ProcessInfo,
+}
+
+impl Drop for CancellableProcess {
+    fn drop(&mut self) {
+        PROCESS_LIST_COUNT
+            .with_label_values(&[&self.process.catalog])
+            .dec();
+    }
+}
+
+impl CancellableProcess {
+    fn new(handle: Arc<CancellationHandle>, process: ProcessInfo) -> Self {
+        PROCESS_LIST_COUNT
+            .with_label_values(&[&process.catalog])
+            .inc();
+        Self { handle, process }
+    }
+}
+
+impl Debug for CancellableProcess {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("CancellableProcess")
+            .field("cancelled", &self.handle.is_cancelled())
+            .field("process", &self.process)
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use crate::process_manager::ProcessManager;
+
+    #[tokio::test]
+    async fn test_register_query() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+        let ticket = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["test".to_string()],
+            "SELECT * FROM table".to_string(),
+            "".to_string(),
+            None,
+        );
+
+        let running_processes = process_manager.local_processes(None).unwrap();
+        assert_eq!(running_processes.len(), 1);
+        assert_eq!(&running_processes[0].frontend, "127.0.0.1:8000");
+        assert_eq!(running_processes[0].id, ticket.id);
+        assert_eq!(&running_processes[0].query, "SELECT * FROM table");
+
+        drop(ticket);
+        assert_eq!(process_manager.local_processes(None).unwrap().len(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_register_query_with_custom_id() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+        let custom_id = 12345;
+
+        let ticket = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["test".to_string()],
+            "SELECT * FROM table".to_string(),
+            "client1".to_string(),
+            Some(custom_id),
+        );
+
+        assert_eq!(ticket.id, custom_id);
+
+        let running_processes = process_manager.local_processes(None).unwrap();
+        assert_eq!(running_processes.len(), 1);
+        assert_eq!(running_processes[0].id, custom_id);
+        assert_eq!(&running_processes[0].client, "client1");
+    }
+
+    #[tokio::test]
+    async fn test_multiple_queries_same_catalog() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        let ticket1 = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["schema1".to_string()],
+            "SELECT * FROM table1".to_string(),
+            "client1".to_string(),
+            None,
+        );
+
+        let ticket2 = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["schema2".to_string()],
+            "SELECT * FROM table2".to_string(),
+            "client2".to_string(),
+            None,
+        );
+
+        let running_processes = process_manager.local_processes(Some("public")).unwrap();
+        assert_eq!(running_processes.len(), 2);
+
+        // Verify both processes are present
+        let ids: Vec<u32> = running_processes.iter().map(|p| p.id).collect();
+        assert!(ids.contains(&ticket1.id));
+        assert!(ids.contains(&ticket2.id));
+    }
+
+    #[tokio::test]
+    async fn test_multiple_catalogs() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        let _ticket1 = process_manager.clone().register_query(
+            "catalog1".to_string(),
+            vec!["schema1".to_string()],
+            "SELECT * FROM table1".to_string(),
+            "client1".to_string(),
+            None,
+        );
+
+        let _ticket2 = process_manager.clone().register_query(
+            "catalog2".to_string(),
+            vec!["schema2".to_string()],
+            "SELECT * FROM table2".to_string(),
+            "client2".to_string(),
+            None,
+        );
+
+        // Test listing processes for specific catalog
+        let catalog1_processes = process_manager.local_processes(Some("catalog1")).unwrap();
+        assert_eq!(catalog1_processes.len(), 1);
+        assert_eq!(&catalog1_processes[0].catalog, "catalog1");
+
+        let catalog2_processes = process_manager.local_processes(Some("catalog2")).unwrap();
+        assert_eq!(catalog2_processes.len(), 1);
+        assert_eq!(&catalog2_processes[0].catalog, "catalog2");
+
+        // Test listing all processes
+        let all_processes = process_manager.local_processes(None).unwrap();
+        assert_eq!(all_processes.len(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_deregister_query() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        let ticket = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["test".to_string()],
+            "SELECT * FROM table".to_string(),
+            "client1".to_string(),
+            None,
+        );
+        assert_eq!(process_manager.local_processes(None).unwrap().len(), 1);
+        process_manager.deregister_query("public".to_string(), ticket.id);
+        assert_eq!(process_manager.local_processes(None).unwrap().len(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_cancellation_handle() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        let ticket = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["test".to_string()],
+            "SELECT * FROM table".to_string(),
+            "client1".to_string(),
+            None,
+        );
+
+        assert!(!ticket.cancellation_handle.is_cancelled());
+        ticket.cancellation_handle.cancel();
+        assert!(ticket.cancellation_handle.is_cancelled());
+    }
+
+    #[tokio::test]
+    async fn test_kill_local_process() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        let ticket = process_manager.clone().register_query(
+            "public".to_string(),
+            vec!["test".to_string()],
+            "SELECT * FROM table".to_string(),
+            "client1".to_string(),
+            None,
+        );
+        assert!(!ticket.cancellation_handle.is_cancelled());
+        let killed = process_manager
+            .kill_process(
+                "127.0.0.1:8000".to_string(),
+                "public".to_string(),
+                ticket.id,
+            )
+            .await
+            .unwrap();
+
+        assert!(killed);
+        assert_eq!(process_manager.local_processes(None).unwrap().len(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_kill_nonexistent_process() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+        let killed = process_manager
+            .kill_process("127.0.0.1:8000".to_string(), "public".to_string(), 999)
+            .await
+            .unwrap();
+        assert!(!killed);
+    }
+
+    #[tokio::test]
+    async fn test_kill_process_nonexistent_catalog() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+        let killed = process_manager
+            .kill_process("127.0.0.1:8000".to_string(), "nonexistent".to_string(), 1)
+            .await
+            .unwrap();
+        assert!(!killed);
+    }
+
+    #[tokio::test]
+    async fn test_process_info_fields() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        let _ticket = process_manager.clone().register_query(
+            "test_catalog".to_string(),
+            vec!["schema1".to_string(), "schema2".to_string()],
+            "SELECT COUNT(*) FROM users WHERE age > 18".to_string(),
+            "test_client".to_string(),
+            Some(42),
+        );
+
+        let processes = process_manager.local_processes(None).unwrap();
+        assert_eq!(processes.len(), 1);
+
+        let process = &processes[0];
+        assert_eq!(process.id, 42);
+        assert_eq!(&process.catalog, "test_catalog");
+        assert_eq!(process.schemas, vec!["schema1", "schema2"]);
+        assert_eq!(&process.query, "SELECT COUNT(*) FROM users WHERE age > 18");
+        assert_eq!(&process.client, "test_client");
+        assert_eq!(&process.frontend, "127.0.0.1:8000");
+        assert!(process.start_timestamp > 0);
+    }
+
+    #[tokio::test]
+    async fn test_ticket_drop_deregisters_process() {
+        let process_manager = Arc::new(ProcessManager::new("127.0.0.1:8000".to_string(), None));
+
+        {
+            let _ticket = process_manager.clone().register_query(
+                "public".to_string(),
+                vec!["test".to_string()],
+                "SELECT * FROM table".to_string(),
+                "client1".to_string(),
+                None,
+            );
+
+            // Process should be registered
+            assert_eq!(process_manager.local_processes(None).unwrap().len(), 1);
+        } // ticket goes out of scope here
+
+        // Process should be automatically deregistered
+        assert_eq!(process_manager.local_processes(None).unwrap().len(), 0);
+    }
+}
--- a/src/catalog/src/system_schema/information_schema.rs
+++ b/src/catalog/src/system_schema/information_schema.rs
@@ -19,6 +19,7 @@ mod information_memory_table;
 pub mod key_column_usage;
 mod partitions;
 mod procedure_info;
+pub mod process_list;
 pub mod region_peers;
 mod region_statistics;
 mod runtime_metrics;
@@ -42,6 +43,7 @@ use common_recordbatch::SendableRecordBatchStream;
 use datatypes::schema::SchemaRef;
 use lazy_static::lazy_static;
 use paste::paste;
+use process_list::InformationSchemaProcessList;
 use store_api::storage::{ScanRequest, TableId};
 use table::metadata::TableType;
 use table::TableRef;
@@ -50,6 +52,7 @@ use views::InformationSchemaViews;

 use self::columns::InformationSchemaColumns;
 use crate::error::{Error, Result};
+use crate::process_manager::ProcessManagerRef;
 use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
 use crate::system_schema::information_schema::flows::InformationSchemaFlows;
 use crate::system_schema::information_schema::information_memory_table::get_schema_columns;
@@ -113,6 +116,7 @@ macro_rules! setup_memory_table {
 pub struct InformationSchemaProvider {
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
+    process_manager: Option<ProcessManagerRef>,
    flow_metadata_manager: Arc<FlowMetadataManager>,
    tables: HashMap<String, TableRef>,
 }
@@ -207,6 +211,10 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
                    self.catalog_manager.clone(),
                ),
            ) as _),
+            PROCESS_LIST => self
+                .process_manager
+                .as_ref()
+                .map(|p| Arc::new(InformationSchemaProcessList::new(p.clone())) as _),
            _ => None,
        }
    }
@@ -217,11 +225,13 @@ impl InformationSchemaProvider {
        catalog_name: String,
        catalog_manager: Weak<dyn CatalogManager>,
        flow_metadata_manager: Arc<FlowMetadataManager>,
+        process_manager: Option<ProcessManagerRef>,
    ) -> Self {
        let mut provider = Self {
            catalog_name,
            catalog_manager,
            flow_metadata_manager,
+            process_manager,
            tables: HashMap::new(),
        };

@@ -277,6 +287,9 @@ impl InformationSchemaProvider {
            self.build_table(TABLE_CONSTRAINTS).unwrap(),
        );
        tables.insert(FLOWS.to_string(), self.build_table(FLOWS).unwrap());
+        if let Some(process_list) = self.build_table(PROCESS_LIST) {
+            tables.insert(PROCESS_LIST.to_string(), process_list);
+        }
        // Add memory tables
        for name in MEMORY_TABLES.iter() {
            tables.insert((*name).to_string(), self.build_table(name).expect(name));
--- a/src/catalog/src/system_schema/information_schema/process_list.rs
+++ b/src/catalog/src/system_schema/information_schema/process_list.rs
@@ -0,0 +1,189 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_catalog::consts::INFORMATION_SCHEMA_PROCESS_LIST_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_frontend::DisplayProcessId;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use common_time::util::current_time_millis;
+use common_time::{Duration, Timestamp};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datatypes::prelude::ConcreteDataType as CDT;
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{
+    DurationMillisecondVectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
+    VectorRef,
+};
+use snafu::ResultExt;
+use store_api::storage::{ScanRequest, TableId};
+
+use crate::error::{self, InternalSnafu};
+use crate::information_schema::Predicates;
+use crate::process_manager::ProcessManagerRef;
+use crate::system_schema::information_schema::InformationTable;
+
+/// Column names of `information_schema.process_list`
+pub const ID: &str = "id";
+pub const CATALOG: &str = "catalog";
+pub const SCHEMAS: &str = "schemas";
+pub const QUERY: &str = "query";
+pub const CLIENT: &str = "client";
+pub const FRONTEND: &str = "frontend";
+pub const START_TIMESTAMP: &str = "start_timestamp";
+pub const ELAPSED_TIME: &str = "elapsed_time";
+
+/// `information_schema.process_list` table implementation that tracks running
+/// queries in current cluster.
+pub struct InformationSchemaProcessList {
+    schema: SchemaRef,
+    process_manager: ProcessManagerRef,
+}
+
+impl InformationSchemaProcessList {
+    pub fn new(process_manager: ProcessManagerRef) -> Self {
+        Self {
+            schema: Self::schema(),
+            process_manager,
+        }
+    }
+
+    fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(ID, CDT::string_datatype(), false),
+            ColumnSchema::new(CATALOG, CDT::string_datatype(), false),
+            ColumnSchema::new(SCHEMAS, CDT::string_datatype(), false),
+            ColumnSchema::new(QUERY, CDT::string_datatype(), false),
+            ColumnSchema::new(CLIENT, CDT::string_datatype(), false),
+            ColumnSchema::new(FRONTEND, CDT::string_datatype(), false),
+            ColumnSchema::new(
+                START_TIMESTAMP,
+                CDT::timestamp_millisecond_datatype(),
+                false,
+            ),
+            ColumnSchema::new(ELAPSED_TIME, CDT::duration_millisecond_datatype(), false),
+        ]))
+    }
+}
+
+impl InformationTable for InformationSchemaProcessList {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_PROCESS_LIST_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        "process_list"
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> error::Result<SendableRecordBatchStream> {
+        let process_manager = self.process_manager.clone();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            self.schema.arrow_schema().clone(),
+            futures::stream::once(async move {
+                make_process_list(process_manager, request)
+                    .await
+                    .map(RecordBatch::into_df_record_batch)
+                    .map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))
+            }),
+        ));
+
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+/// Build running process list.
+async fn make_process_list(
+    process_manager: ProcessManagerRef,
+    request: ScanRequest,
+) -> error::Result<RecordBatch> {
+    let predicates = Predicates::from_scan_request(&Some(request));
+    let current_time = current_time_millis();
+    // todo(hl): find a way to extract user catalog to filter queries from other users.
+    let queries = process_manager.list_all_processes(None).await?;
+
+    let mut id_builder = StringVectorBuilder::with_capacity(queries.len());
+    let mut catalog_builder = StringVectorBuilder::with_capacity(queries.len());
+    let mut schemas_builder = StringVectorBuilder::with_capacity(queries.len());
+    let mut query_builder = StringVectorBuilder::with_capacity(queries.len());
+    let mut client_builder = StringVectorBuilder::with_capacity(queries.len());
+    let mut frontend_builder = StringVectorBuilder::with_capacity(queries.len());
+    let mut start_time_builder = TimestampMillisecondVectorBuilder::with_capacity(queries.len());
+    let mut elapsed_time_builder = DurationMillisecondVectorBuilder::with_capacity(queries.len());
+
+    for process in queries {
+        let display_id = DisplayProcessId {
+            server_addr: process.frontend.to_string(),
+            id: process.id,
+        }
+        .to_string();
+        let schemas = process.schemas.join(",");
+        let id = Value::from(display_id);
+        let catalog = Value::from(process.catalog);
+        let schemas = Value::from(schemas);
+        let query = Value::from(process.query);
+        let client = Value::from(process.client);
+        let frontend = Value::from(process.frontend);
+        let start_timestamp = Value::from(Timestamp::new_millisecond(process.start_timestamp));
+        let elapsed_time = Value::from(Duration::new_millisecond(
+            current_time - process.start_timestamp,
+        ));
+        let row = [
+            (ID, &id),
+            (CATALOG, &catalog),
+            (SCHEMAS, &schemas),
+            (QUERY, &query),
+            (CLIENT, &client),
+            (FRONTEND, &frontend),
+            (START_TIMESTAMP, &start_timestamp),
+            (ELAPSED_TIME, &elapsed_time),
+        ];
+        if predicates.eval(&row) {
+            id_builder.push(id.as_string().as_deref());
+            catalog_builder.push(catalog.as_string().as_deref());
+            schemas_builder.push(schemas.as_string().as_deref());
+            query_builder.push(query.as_string().as_deref());
+            client_builder.push(client.as_string().as_deref());
+            frontend_builder.push(frontend.as_string().as_deref());
+            start_time_builder.push(start_timestamp.as_timestamp().map(|t| t.value().into()));
+            elapsed_time_builder.push(elapsed_time.as_duration().map(|d| d.value().into()));
+        }
+    }
+
+    RecordBatch::new(
+        InformationSchemaProcessList::schema(),
+        vec![
+            Arc::new(id_builder.finish()) as VectorRef,
+            Arc::new(catalog_builder.finish()) as VectorRef,
+            Arc::new(schemas_builder.finish()) as VectorRef,
+            Arc::new(query_builder.finish()) as VectorRef,
+            Arc::new(client_builder.finish()) as VectorRef,
+            Arc::new(frontend_builder.finish()) as VectorRef,
+            Arc::new(start_time_builder.finish()) as VectorRef,
+            Arc::new(elapsed_time_builder.finish()) as VectorRef,
+        ],
+    )
+    .context(error::CreateRecordBatchSnafu)
+}
--- a/src/catalog/src/system_schema/information_schema/table_names.rs
+++ b/src/catalog/src/system_schema/information_schema/table_names.rs
@@ -47,3 +47,4 @@ pub const VIEWS: &str = "views";
 pub const FLOWS: &str = "flows";
 pub const PROCEDURE_INFO: &str = "procedure_info";
 pub const REGION_STATISTICS: &str = "region_statistics";
+pub const PROCESS_LIST: &str = "process_list";
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -328,6 +328,7 @@ mod tests {
            backend.clone(),
            layered_cache_registry,
            None,
+            None,
        );
        let table_metadata_manager = TableMetadataManager::new(backend);
        let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);
--- a/src/cli/Cargo.toml
+++ b/src/cli/Cargo.toml
@@ -5,13 +5,18 @@ edition.workspace = true
 license.workspace = true

 [features]
-pg_kvbackend = ["common-meta/pg_kvbackend"]
-mysql_kvbackend = ["common-meta/mysql_kvbackend"]
+default = [
+    "pg_kvbackend",
+    "mysql_kvbackend",
+]
+pg_kvbackend = ["common-meta/pg_kvbackend", "meta-srv/pg_kvbackend"]
+mysql_kvbackend = ["common-meta/mysql_kvbackend", "meta-srv/mysql_kvbackend"]

 [lints]
 workspace = true

 [dependencies]
+async-stream.workspace = true
 async-trait.workspace = true
 auth.workspace = true
 base64.workspace = true
@@ -46,6 +51,7 @@ meta-client.workspace = true
 meta-srv.workspace = true
 nu-ansi-term = "0.46"
 object-store.workspace = true
+operator.workspace = true
 query.workspace = true
 rand.workspace = true
 reqwest.workspace = true
@@ -61,6 +67,7 @@ tokio.workspace = true
 tracing-appender.workspace = true

 [dev-dependencies]
+common-meta = { workspace = true, features = ["testing"] }
 common-version.workspace = true
 serde.workspace = true
 tempfile.workspace = true
--- a/src/cli/src/bench.rs
+++ b/src/cli/src/bench.rs
@@ -58,6 +58,7 @@ where
    info!("{desc}, average operation cost: {cost:.2} ms");
 }

+/// Command to benchmark table metadata operations.
 #[derive(Debug, Default, Parser)]
 pub struct BenchTableMetadataCommand {
    #[clap(long)]
--- a/src/cli/src/data.rs
+++ b/src/cli/src/data.rs
@@ -0,0 +1,39 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod export;
+mod import;
+
+use clap::Subcommand;
+use common_error::ext::BoxedError;
+
+use crate::data::export::ExportCommand;
+use crate::data::import::ImportCommand;
+use crate::Tool;
+
+/// Command for data operations including exporting data from and importing data into GreptimeDB.
+#[derive(Subcommand)]
+pub enum DataCommand {
+    Export(ExportCommand),
+    Import(ImportCommand),
+}
+
+impl DataCommand {
+    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
+        match self {
+            DataCommand::Export(cmd) => cmd.build().await,
+            DataCommand::Import(cmd) => cmd.build().await,
+        }
+    }
+}
--- a/src/cli/src/data/export.rs
+++ b/src/cli/src/data/export.rs
@@ -19,9 +19,11 @@ use std::time::Duration;

 use async_trait::async_trait;
 use clap::{Parser, ValueEnum};
+use common_base::secrets::{ExposeSecret, SecretString};
 use common_error::ext::BoxedError;
 use common_telemetry::{debug, error, info};
 use object_store::layers::LoggingLayer;
+use object_store::services::Oss;
 use object_store::{services, ObjectStore};
 use serde_json::Value;
 use snafu::{OptionExt, ResultExt};
@@ -48,6 +50,7 @@ enum ExportTarget {
    All,
 }

+/// Command for exporting data from the GreptimeDB.
 #[derive(Debug, Default, Parser)]
 pub struct ExportCommand {
    /// Server address to connect
@@ -110,15 +113,15 @@ pub struct ExportCommand {
    #[clap(long)]
    s3: bool,

-    /// if both `s3_ddl_local_dir` and `s3` are set, `s3_ddl_local_dir` will be only used for
-    /// exported SQL files, and the data will be exported to s3.
+    /// if both `ddl_local_dir` and remote storage (s3/oss) are set, `ddl_local_dir` will be only used for
+    /// exported SQL files, and the data will be exported to remote storage.
    ///
-    /// Note that `s3_ddl_local_dir` export sql files to **LOCAL** file system, this is useful if export client don't have
-    /// direct access to s3.
+    /// Note that `ddl_local_dir` export sql files to **LOCAL** file system, this is useful if export client don't have
+    /// direct access to remote storage.
    ///
-    /// if `s3` is set but `s3_ddl_local_dir` is not set, both SQL&data will be exported to s3.
+    /// if remote storage is set but `ddl_local_dir` is not set, both SQL&data will be exported to remote storage.
    #[clap(long)]
-    s3_ddl_local_dir: Option<String>,
+    ddl_local_dir: Option<String>,

    /// The s3 bucket name
    /// if s3 is set, this is required
@@ -149,6 +152,30 @@ pub struct ExportCommand {
    /// if s3 is set, this is required
    #[clap(long)]
    s3_region: Option<String>,
+
+    /// if export data to oss
+    #[clap(long)]
+    oss: bool,
+
+    /// The oss bucket name
+    /// if oss is set, this is required
+    #[clap(long)]
+    oss_bucket: Option<String>,
+
+    /// The oss endpoint
+    /// if oss is set, this is required
+    #[clap(long)]
+    oss_endpoint: Option<String>,
+
+    /// The oss access key id
+    /// if oss is set, this is required
+    #[clap(long)]
+    oss_access_key_id: Option<String>,
+
+    /// The oss access key secret
+    /// if oss is set, this is required
+    #[clap(long)]
+    oss_access_key_secret: Option<String>,
 }

 impl ExportCommand {
@@ -162,7 +189,7 @@ impl ExportCommand {
        {
            return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
        }
-        if !self.s3 && self.output_dir.is_none() {
+        if !self.s3 && !self.oss && self.output_dir.is_none() {
            return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
        }
        let (catalog, schema) =
@@ -187,13 +214,32 @@ impl ExportCommand {
            start_time: self.start_time.clone(),
            end_time: self.end_time.clone(),
            s3: self.s3,
-            s3_ddl_local_dir: self.s3_ddl_local_dir.clone(),
+            ddl_local_dir: self.ddl_local_dir.clone(),
            s3_bucket: self.s3_bucket.clone(),
            s3_root: self.s3_root.clone(),
            s3_endpoint: self.s3_endpoint.clone(),
-            s3_access_key: self.s3_access_key.clone(),
-            s3_secret_key: self.s3_secret_key.clone(),
+            // Wrap sensitive values in SecretString
+            s3_access_key: self
+                .s3_access_key
+                .as_ref()
+                .map(|k| SecretString::from(k.clone())),
+            s3_secret_key: self
+                .s3_secret_key
+                .as_ref()
+                .map(|k| SecretString::from(k.clone())),
            s3_region: self.s3_region.clone(),
+            oss: self.oss,
+            oss_bucket: self.oss_bucket.clone(),
+            oss_endpoint: self.oss_endpoint.clone(),
+            // Wrap sensitive values in SecretString
+            oss_access_key_id: self
+                .oss_access_key_id
+                .as_ref()
+                .map(|k| SecretString::from(k.clone())),
+            oss_access_key_secret: self
+                .oss_access_key_secret
+                .as_ref()
+                .map(|k| SecretString::from(k.clone())),
        }))
    }
 }
@@ -209,23 +255,30 @@ pub struct Export {
    start_time: Option<String>,
    end_time: Option<String>,
    s3: bool,
-    s3_ddl_local_dir: Option<String>,
+    ddl_local_dir: Option<String>,
    s3_bucket: Option<String>,
    s3_root: Option<String>,
    s3_endpoint: Option<String>,
-    s3_access_key: Option<String>,
-    s3_secret_key: Option<String>,
+    // Changed to SecretString for sensitive data
+    s3_access_key: Option<SecretString>,
+    s3_secret_key: Option<SecretString>,
    s3_region: Option<String>,
+    oss: bool,
+    oss_bucket: Option<String>,
+    oss_endpoint: Option<String>,
+    // Changed to SecretString for sensitive data
+    oss_access_key_id: Option<SecretString>,
+    oss_access_key_secret: Option<SecretString>,
 }

 impl Export {
    fn catalog_path(&self) -> PathBuf {
-        if self.s3 {
+        if self.s3 || self.oss {
            PathBuf::from(&self.catalog)
        } else if let Some(dir) = &self.output_dir {
            PathBuf::from(dir).join(&self.catalog)
        } else {
-            unreachable!("catalog_path: output_dir must be set when not using s3")
+            unreachable!("catalog_path: output_dir must be set when not using remote storage")
        }
    }

@@ -427,7 +480,7 @@ impl Export {
                    .await?;

                // Create directory if needed for file system storage
-                if !export_self.s3 {
+                if !export_self.s3 && !export_self.oss {
                    let db_dir = format!("{}/{}/", export_self.catalog, schema);
                    operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
                }
@@ -473,6 +526,8 @@ impl Export {
    async fn build_operator(&self) -> Result<ObjectStore> {
        if self.s3 {
            self.build_s3_operator().await
+        } else if self.oss {
+            self.build_oss_operator().await
        } else {
            self.build_fs_operator().await
        }
@@ -480,9 +535,8 @@ impl Export {

    /// build operator with preference for file system
    async fn build_prefer_fs_operator(&self) -> Result<ObjectStore> {
-        // is under s3 mode and s3_ddl_dir is set, use it as root
-        if self.s3 && self.s3_ddl_local_dir.is_some() {
-            let root = self.s3_ddl_local_dir.as_ref().unwrap().clone();
+        if (self.s3 || self.oss) && self.ddl_local_dir.is_some() {
+            let root = self.ddl_local_dir.as_ref().unwrap().clone();
            let op = ObjectStore::new(services::Fs::default().root(&root))
                .context(OpenDalSnafu)?
                .layer(LoggingLayer::default())
@@ -490,6 +544,8 @@ impl Export {
            Ok(op)
        } else if self.s3 {
            self.build_s3_operator().await
+        } else if self.oss {
+            self.build_oss_operator().await
        } else {
            self.build_fs_operator().await
        }
@@ -515,11 +571,35 @@ impl Export {
        }

        if let Some(key_id) = self.s3_access_key.as_ref() {
-            builder = builder.access_key_id(key_id);
+            builder = builder.access_key_id(key_id.expose_secret());
        }

        if let Some(secret_key) = self.s3_secret_key.as_ref() {
-            builder = builder.secret_access_key(secret_key);
+            builder = builder.secret_access_key(secret_key.expose_secret());
+        }
+
+        let op = ObjectStore::new(builder)
+            .context(OpenDalSnafu)?
+            .layer(LoggingLayer::default())
+            .finish();
+        Ok(op)
+    }
+
+    async fn build_oss_operator(&self) -> Result<ObjectStore> {
+        let mut builder = Oss::default()
+            .bucket(self.oss_bucket.as_ref().expect("oss_bucket must be set"))
+            .endpoint(
+                self.oss_endpoint
+                    .as_ref()
+                    .expect("oss_endpoint must be set"),
+            );
+
+        // Use expose_secret() to access the actual secret value
+        if let Some(key_id) = self.oss_access_key_id.as_ref() {
+            builder = builder.access_key_id(key_id.expose_secret());
+        }
+        if let Some(secret_key) = self.oss_access_key_secret.as_ref() {
+            builder = builder.access_key_secret(secret_key.expose_secret());
        }

        let op = ObjectStore::new(builder)
@@ -562,8 +642,8 @@ impl Export {
            tasks.push(async move {
                let _permit = semaphore_moved.acquire().await.unwrap();

-                // Create directory if not using S3
-                if !export_self.s3 {
+                // Create directory if not using remote storage
+                if !export_self.s3 && !export_self.oss {
                    let db_dir = format!("{}/{}/", export_self.catalog, schema);
                    operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
                }
@@ -575,7 +655,11 @@ impl Export {
                    r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
                    export_self.catalog, schema, path, with_options_clone, connection_part
                );
-                info!("Executing sql: {sql}");
+
+                // Log SQL command but mask sensitive information
+                let safe_sql = export_self.mask_sensitive_sql(&sql);
+                info!("Executing sql: {}", safe_sql);
+
                export_self.database_client.sql_in_public(&sql).await?;
                info!(
                    "Finished exporting {}.{} data to {}",
@@ -615,6 +699,29 @@ impl Export {
        Ok(())
    }

+    /// Mask sensitive information in SQL commands for safe logging
+    fn mask_sensitive_sql(&self, sql: &str) -> String {
+        let mut masked_sql = sql.to_string();
+
+        // Mask S3 credentials
+        if let Some(access_key) = &self.s3_access_key {
+            masked_sql = masked_sql.replace(access_key.expose_secret(), "[REDACTED]");
+        }
+        if let Some(secret_key) = &self.s3_secret_key {
+            masked_sql = masked_sql.replace(secret_key.expose_secret(), "[REDACTED]");
+        }
+
+        // Mask OSS credentials
+        if let Some(access_key_id) = &self.oss_access_key_id {
+            masked_sql = masked_sql.replace(access_key_id.expose_secret(), "[REDACTED]");
+        }
+        if let Some(access_key_secret) = &self.oss_access_key_secret {
+            masked_sql = masked_sql.replace(access_key_secret.expose_secret(), "[REDACTED]");
+        }
+
+        masked_sql
+    }
+
    fn get_file_path(&self, schema: &str, file_name: &str) -> String {
        format!("{}/{}/{}", self.catalog, schema, file_name)
    }
@@ -631,6 +738,13 @@ impl Export {
                },
                file_path
            )
+        } else if self.oss {
+            format!(
+                "oss://{}/{}/{}",
+                self.oss_bucket.as_ref().unwrap_or(&String::new()),
+                self.catalog,
+                file_path
+            )
        } else {
            format!(
                "{}/{}",
@@ -675,15 +789,36 @@ impl Export {
            };

            // Safety: All s3 options are required
+            // Use expose_secret() to access the actual secret values
            let connection_options = format!(
                "ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
-                self.s3_access_key.as_ref().unwrap(),
-                self.s3_secret_key.as_ref().unwrap(),
+                self.s3_access_key.as_ref().unwrap().expose_secret(),
+                self.s3_secret_key.as_ref().unwrap().expose_secret(),
                self.s3_region.as_ref().unwrap(),
                endpoint_option
            );

            (s3_path, format!(" CONNECTION ({})", connection_options))
+        } else if self.oss {
+            let oss_path = format!(
+                "oss://{}/{}/{}/",
+                self.oss_bucket.as_ref().unwrap(),
+                self.catalog,
+                schema
+            );
+            let endpoint_option = if let Some(endpoint) = self.oss_endpoint.as_ref() {
+                format!(", ENDPOINT='{}'", endpoint)
+            } else {
+                String::new()
+            };
+
+            let connection_options = format!(
+                "ACCESS_KEY_ID='{}', ACCESS_KEY_SECRET='{}'{}",
+                self.oss_access_key_id.as_ref().unwrap().expose_secret(),
+                self.oss_access_key_secret.as_ref().unwrap().expose_secret(),
+                endpoint_option
+            );
+            (oss_path, format!(" CONNECTION ({})", connection_options))
        } else {
            (
                self.catalog_path()
--- a/src/cli/src/data/import.rs
+++ b/src/cli/src/data/import.rs
@@ -40,6 +40,7 @@ enum ImportTarget {
    All,
 }

+/// Command to import data from a directory into a GreptimeDB instance.
 #[derive(Debug, Default, Parser)]
 pub struct ImportCommand {
    /// Server address to connect
--- a/src/cli/src/error.rs
+++ b/src/cli/src/error.rs
@@ -17,8 +17,10 @@ use std::any::Any;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
+use common_meta::peer::Peer;
 use object_store::Error as ObjectStoreError;
 use snafu::{Location, Snafu};
+use store_api::storage::TableId;

 #[derive(Snafu)]
 #[snafu(visibility(pub))]
@@ -30,6 +32,7 @@ pub enum Error {
        location: Location,
        msg: String,
    },
+
    #[snafu(display("Failed to create default catalog and schema"))]
    InitMetadata {
        #[snafu(implicit)]
@@ -72,6 +75,20 @@ pub enum Error {
        source: common_meta::error::Error,
    },

+    #[snafu(display("Failed to get table metadata"))]
+    TableMetadata {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_meta::error::Error,
+    },
+
+    #[snafu(display("Unexpected error: {}", msg))]
+    Unexpected {
+        msg: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Missing config, msg: {}", msg))]
    MissingConfig {
        msg: String,
@@ -221,6 +238,13 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Table not found: {table_id}"))]
+    TableNotFound {
+        table_id: TableId,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("OpenDAL operator failed"))]
    OpenDal {
        #[snafu(implicit)]
@@ -228,22 +252,67 @@ pub enum Error {
        #[snafu(source)]
        error: ObjectStoreError,
    },
+
    #[snafu(display("S3 config need be set"))]
    S3ConfigNotSet {
        #[snafu(implicit)]
        location: Location,
    },
+
    #[snafu(display("Output directory not set"))]
    OutputDirNotSet {
        #[snafu(implicit)]
        location: Location,
    },
-    #[snafu(display("KV backend not set: {}", backend))]
-    KvBackendNotSet {
-        backend: String,
+
+    #[snafu(display("Empty store addresses"))]
+    EmptyStoreAddrs {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Unsupported memory backend"))]
+    UnsupportedMemoryBackend {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("File path invalid: {}", msg))]
+    InvalidFilePath {
+        msg: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Invalid arguments: {}", msg))]
+    InvalidArguments {
+        msg: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to init backend"))]
+    InitBackend {
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        error: ObjectStoreError,
+    },
+
+    #[snafu(display("Covert column schemas to defs failed"))]
+    CovertColumnSchemasToDefs {
+        #[snafu(implicit)]
+        location: Location,
+        source: operator::error::Error,
+    },
+
+    #[snafu(display("Failed to send request to datanode: {}", peer))]
+    SendRequestToDatanode {
+        peer: Peer,
+        #[snafu(implicit)]
+        location: Location,
+        source: common_meta::error::Error,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -251,9 +320,9 @@ pub type Result<T> = std::result::Result<T, Error>;
 impl ErrorExt for Error {
    fn status_code(&self) -> StatusCode {
        match self {
-            Error::InitMetadata { source, .. } | Error::InitDdlManager { source, .. } => {
-                source.status_code()
-            }
+            Error::InitMetadata { source, .. }
+            | Error::InitDdlManager { source, .. }
+            | Error::TableMetadata { source, .. } => source.status_code(),

            Error::MissingConfig { .. }
            | Error::LoadLayeredConfig { .. }
@@ -262,8 +331,14 @@ impl ErrorExt for Error {
            | Error::ConnectEtcd { .. }
            | Error::CreateDir { .. }
            | Error::EmptyResult { .. }
+            | Error::InvalidFilePath { .. }
+            | Error::UnsupportedMemoryBackend { .. }
+            | Error::InvalidArguments { .. }
            | Error::ParseProxyOpts { .. } => StatusCode::InvalidArguments,

+            Error::CovertColumnSchemasToDefs { source, .. } => source.status_code(),
+            Error::SendRequestToDatanode { source, .. } => source.status_code(),
+
            Error::StartProcedureManager { source, .. }
            | Error::StopProcedureManager { source, .. } => source.status_code(),
            Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
@@ -271,6 +346,7 @@ impl ErrorExt for Error {
            Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
                source.status_code()
            }
+            Error::Unexpected { .. } => StatusCode::Unexpected,

            Error::SerdeJson { .. }
            | Error::FileIo { .. }
@@ -279,15 +355,16 @@ impl ErrorExt for Error {
            | Error::BuildClient { .. } => StatusCode::Unexpected,

            Error::Other { source, .. } => source.status_code(),
-            Error::OpenDal { .. } => StatusCode::Internal,
+            Error::OpenDal { .. } | Error::InitBackend { .. } => StatusCode::Internal,
            Error::S3ConfigNotSet { .. }
            | Error::OutputDirNotSet { .. }
-            | Error::KvBackendNotSet { .. } => StatusCode::InvalidArguments,
+            | Error::EmptyStoreAddrs { .. } => StatusCode::InvalidArguments,

            Error::BuildRuntime { source, .. } => source.status_code(),

            Error::CacheRequired { .. } | Error::BuildCacheRegistry { .. } => StatusCode::Internal,
            Error::MetaClientInit { source, .. } => source.status_code(),
+            Error::TableNotFound { .. } => StatusCode::TableNotFound,
            Error::SchemaNotFound { .. } => StatusCode::DatabaseNotFound,
        }
    }
--- a/src/cli/src/lib.rs
+++ b/src/cli/src/lib.rs
@@ -13,11 +13,10 @@
 // limitations under the License.

 mod bench;
+mod data;
 mod database;
 pub mod error;
-mod export;
-mod import;
-mod meta_snapshot;
+mod metadata;

 use async_trait::async_trait;
 use clap::Parser;
@@ -26,9 +25,8 @@ pub use database::DatabaseClient;
 use error::Result;

 pub use crate::bench::BenchTableMetadataCommand;
-pub use crate::export::ExportCommand;
-pub use crate::import::ImportCommand;
-pub use crate::meta_snapshot::{MetaRestoreCommand, MetaSnapshotCommand};
+pub use crate::data::DataCommand;
+pub use crate::metadata::MetadataCommand;

 #[async_trait]
 pub trait Tool: Send + Sync {
--- a/src/cli/src/metadata.rs
+++ b/src/cli/src/metadata.rs
@@ -0,0 +1,52 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod common;
+mod control;
+mod repair;
+mod snapshot;
+mod utils;
+
+use clap::Subcommand;
+use common_error::ext::BoxedError;
+
+use crate::metadata::control::{DelCommand, GetCommand};
+use crate::metadata::repair::RepairLogicalTablesCommand;
+use crate::metadata::snapshot::SnapshotCommand;
+use crate::Tool;
+
+/// Command for managing metadata operations,
+/// including saving and restoring metadata snapshots,
+/// controlling metadata operations, and diagnosing and repairing metadata.
+#[derive(Subcommand)]
+pub enum MetadataCommand {
+    #[clap(subcommand)]
+    Snapshot(SnapshotCommand),
+    #[clap(subcommand)]
+    Get(GetCommand),
+    #[clap(subcommand)]
+    Del(DelCommand),
+    RepairLogicalTables(RepairLogicalTablesCommand),
+}
+
+impl MetadataCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        match self {
+            MetadataCommand::Snapshot(cmd) => cmd.build().await,
+            MetadataCommand::RepairLogicalTables(cmd) => cmd.build().await,
+            MetadataCommand::Get(cmd) => cmd.build().await,
+            MetadataCommand::Del(cmd) => cmd.build().await,
+        }
+    }
+}
--- a/src/cli/src/metadata/common.rs
+++ b/src/cli/src/metadata/common.rs
@@ -0,0 +1,116 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use clap::Parser;
+use common_error::ext::BoxedError;
+use common_meta::kv_backend::chroot::ChrootKvBackend;
+use common_meta::kv_backend::etcd::EtcdStore;
+use common_meta::kv_backend::KvBackendRef;
+use meta_srv::bootstrap::create_etcd_client;
+use meta_srv::metasrv::BackendImpl;
+
+use crate::error::{EmptyStoreAddrsSnafu, UnsupportedMemoryBackendSnafu};
+
+#[derive(Debug, Default, Parser)]
+pub(crate) struct StoreConfig {
+    /// The endpoint of store. one of etcd, postgres or mysql.
+    ///
+    /// For postgres store, the format is:
+    /// "password=password dbname=postgres user=postgres host=localhost port=5432"
+    ///
+    /// For etcd store, the format is:
+    /// "127.0.0.1:2379"
+    ///
+    /// For mysql store, the format is:
+    /// "mysql://user:password@ip:port/dbname"
+    #[clap(long, alias = "store-addr", value_delimiter = ',', num_args = 1..)]
+    store_addrs: Vec<String>,
+
+    /// The maximum number of operations in a transaction. Only used when using [etcd-store].
+    #[clap(long, default_value = "128")]
+    max_txn_ops: usize,
+
+    /// The metadata store backend.
+    #[clap(long, value_enum, default_value = "etcd-store")]
+    backend: BackendImpl,
+
+    /// The key prefix of the metadata store.
+    #[clap(long, default_value = "")]
+    store_key_prefix: String,
+
+    /// The table name in RDS to store metadata. Only used when using [postgres-store] or [mysql-store].
+    #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
+    #[clap(long, default_value = common_meta::kv_backend::DEFAULT_META_TABLE_NAME)]
+    meta_table_name: String,
+}
+
+impl StoreConfig {
+    /// Builds a [`KvBackendRef`] from the store configuration.
+    pub async fn build(&self) -> Result<KvBackendRef, BoxedError> {
+        let max_txn_ops = self.max_txn_ops;
+        let store_addrs = &self.store_addrs;
+        if store_addrs.is_empty() {
+            EmptyStoreAddrsSnafu.fail().map_err(BoxedError::new)
+        } else {
+            let kvbackend = match self.backend {
+                BackendImpl::EtcdStore => {
+                    let etcd_client = create_etcd_client(store_addrs)
+                        .await
+                        .map_err(BoxedError::new)?;
+                    Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
+                }
+                #[cfg(feature = "pg_kvbackend")]
+                BackendImpl::PostgresStore => {
+                    let table_name = &self.meta_table_name;
+                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs)
+                        .await
+                        .map_err(BoxedError::new)?;
+                    Ok(common_meta::kv_backend::rds::PgStore::with_pg_pool(
+                        pool,
+                        table_name,
+                        max_txn_ops,
+                    )
+                    .await
+                    .map_err(BoxedError::new)?)
+                }
+                #[cfg(feature = "mysql_kvbackend")]
+                BackendImpl::MysqlStore => {
+                    let table_name = &self.meta_table_name;
+                    let pool = meta_srv::bootstrap::create_mysql_pool(store_addrs)
+                        .await
+                        .map_err(BoxedError::new)?;
+                    Ok(common_meta::kv_backend::rds::MySqlStore::with_mysql_pool(
+                        pool,
+                        table_name,
+                        max_txn_ops,
+                    )
+                    .await
+                    .map_err(BoxedError::new)?)
+                }
+                BackendImpl::MemoryStore => UnsupportedMemoryBackendSnafu
+                    .fail()
+                    .map_err(BoxedError::new),
+            };
+            if self.store_key_prefix.is_empty() {
+                kvbackend
+            } else {
+                let chroot_kvbackend =
+                    ChrootKvBackend::new(self.store_key_prefix.as_bytes().to_vec(), kvbackend?);
+                Ok(Arc::new(chroot_kvbackend))
+            }
+        }
+    }
+}
--- a/src/cli/src/metadata/control.rs
+++ b/src/cli/src/metadata/control.rs
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-mod geo_path;
-mod hll;
-mod uddsketch_state;
+mod del;
+mod get;
+#[cfg(test)]
+mod test_utils;
+mod utils;

-pub use geo_path::{GeoPathAccumulator, GEO_PATH_NAME};
-pub(crate) use hll::HllStateType;
-pub use hll::{HllState, HLL_MERGE_NAME, HLL_NAME};
-pub use uddsketch_state::{UddSketchState, UDDSKETCH_MERGE_NAME, UDDSKETCH_STATE_NAME};
+pub(crate) use del::DelCommand;
+pub(crate) use get::GetCommand;
--- a/src/cli/src/metadata/control/del.rs
+++ b/src/cli/src/metadata/control/del.rs
@@ -0,0 +1,42 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod key;
+mod table;
+
+use clap::Subcommand;
+use common_error::ext::BoxedError;
+
+use crate::metadata::control::del::key::DelKeyCommand;
+use crate::metadata::control::del::table::DelTableCommand;
+use crate::Tool;
+
+/// The prefix of the tombstone keys.
+pub(crate) const CLI_TOMBSTONE_PREFIX: &str = "__cli_tombstone/";
+
+/// Subcommand for deleting metadata from the metadata store.
+#[derive(Subcommand)]
+pub enum DelCommand {
+    Key(DelKeyCommand),
+    Table(DelTableCommand),
+}
+
+impl DelCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        match self {
+            DelCommand::Key(cmd) => cmd.build().await,
+            DelCommand::Table(cmd) => cmd.build().await,
+        }
+    }
+}
--- a/src/cli/src/metadata/control/del/key.rs
+++ b/src/cli/src/metadata/control/del/key.rs
@@ -0,0 +1,132 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use async_trait::async_trait;
+use clap::Parser;
+use common_error::ext::BoxedError;
+use common_meta::key::tombstone::TombstoneManager;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::rpc::store::RangeRequest;
+
+use crate::metadata::common::StoreConfig;
+use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+use crate::Tool;
+
+/// Delete key-value pairs logically from the metadata store.
+#[derive(Debug, Default, Parser)]
+pub struct DelKeyCommand {
+    /// The key to delete from the metadata store.
+    key: String,
+
+    /// Delete key-value pairs with the given prefix.
+    #[clap(long)]
+    prefix: bool,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+}
+
+impl DelKeyCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        let kv_backend = self.store.build().await?;
+        Ok(Box::new(DelKeyTool {
+            key: self.key.to_string(),
+            prefix: self.prefix,
+            key_deleter: KeyDeleter::new(kv_backend),
+        }))
+    }
+}
+
+struct KeyDeleter {
+    kv_backend: KvBackendRef,
+    tombstone_manager: TombstoneManager,
+}
+
+impl KeyDeleter {
+    fn new(kv_backend: KvBackendRef) -> Self {
+        Self {
+            kv_backend: kv_backend.clone(),
+            tombstone_manager: TombstoneManager::new_with_prefix(kv_backend, CLI_TOMBSTONE_PREFIX),
+        }
+    }
+
+    async fn delete(&self, key: &str, prefix: bool) -> Result<usize, BoxedError> {
+        let mut req = RangeRequest::default().with_keys_only();
+        if prefix {
+            req = req.with_prefix(key.as_bytes());
+        } else {
+            req = req.with_key(key.as_bytes());
+        }
+        let resp = self.kv_backend.range(req).await.map_err(BoxedError::new)?;
+        let keys = resp.kvs.iter().map(|kv| kv.key.clone()).collect::<Vec<_>>();
+        self.tombstone_manager
+            .create(keys)
+            .await
+            .map_err(BoxedError::new)
+    }
+}
+
+struct DelKeyTool {
+    key: String,
+    prefix: bool,
+    key_deleter: KeyDeleter,
+}
+
+#[async_trait]
+impl Tool for DelKeyTool {
+    async fn do_work(&self) -> Result<(), BoxedError> {
+        let deleted = self.key_deleter.delete(&self.key, self.prefix).await?;
+        // Print the number of deleted keys.
+        println!("{}", deleted);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use common_meta::kv_backend::chroot::ChrootKvBackend;
+    use common_meta::kv_backend::memory::MemoryKvBackend;
+    use common_meta::kv_backend::{KvBackend, KvBackendRef};
+    use common_meta::rpc::store::RangeRequest;
+
+    use crate::metadata::control::del::key::KeyDeleter;
+    use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+    use crate::metadata::control::test_utils::put_key;
+
+    #[tokio::test]
+    async fn test_delete_keys() {
+        let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
+        let key_deleter = KeyDeleter::new(kv_backend.clone());
+        put_key(&kv_backend, "foo", "bar").await;
+        put_key(&kv_backend, "foo/bar", "baz").await;
+        put_key(&kv_backend, "foo/baz", "qux").await;
+        let deleted = key_deleter.delete("foo", true).await.unwrap();
+        assert_eq!(deleted, 3);
+        let deleted = key_deleter.delete("foo/bar", false).await.unwrap();
+        assert_eq!(deleted, 0);
+
+        let chroot = ChrootKvBackend::new(CLI_TOMBSTONE_PREFIX.as_bytes().to_vec(), kv_backend);
+        let req = RangeRequest::default().with_prefix(b"foo");
+        let resp = chroot.range(req).await.unwrap();
+        assert_eq!(resp.kvs.len(), 3);
+        assert_eq!(resp.kvs[0].key, b"foo");
+        assert_eq!(resp.kvs[0].value, b"bar");
+        assert_eq!(resp.kvs[1].key, b"foo/bar");
+        assert_eq!(resp.kvs[1].value, b"baz");
+        assert_eq!(resp.kvs[2].key, b"foo/baz");
+        assert_eq!(resp.kvs[2].value, b"qux");
+    }
+}
--- a/src/cli/src/metadata/control/del/table.rs
+++ b/src/cli/src/metadata/control/del/table.rs
@@ -0,0 +1,235 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use async_trait::async_trait;
+use clap::Parser;
+use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_catalog::format_full_table_name;
+use common_error::ext::BoxedError;
+use common_meta::ddl::utils::get_region_wal_options;
+use common_meta::key::table_name::TableNameManager;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use store_api::storage::TableId;
+
+use crate::error::{InvalidArgumentsSnafu, TableNotFoundSnafu};
+use crate::metadata::common::StoreConfig;
+use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+use crate::metadata::control::utils::get_table_id_by_name;
+use crate::Tool;
+
+/// Delete table metadata logically from the metadata store.
+#[derive(Debug, Default, Parser)]
+pub struct DelTableCommand {
+    /// The table id to delete from the metadata store.
+    #[clap(long)]
+    table_id: Option<u32>,
+
+    /// The table name to delete from the metadata store.
+    #[clap(long)]
+    table_name: Option<String>,
+
+    /// The schema name of the table.
+    #[clap(long, default_value = DEFAULT_SCHEMA_NAME)]
+    schema_name: String,
+
+    /// The catalog name of the table.
+    #[clap(long, default_value = DEFAULT_CATALOG_NAME)]
+    catalog_name: String,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+}
+
+impl DelTableCommand {
+    fn validate(&self) -> Result<(), BoxedError> {
+        if matches!(
+            (&self.table_id, &self.table_name),
+            (Some(_), Some(_)) | (None, None)
+        ) {
+            return Err(BoxedError::new(
+                InvalidArgumentsSnafu {
+                    msg: "You must specify either --table-id or --table-name.",
+                }
+                .build(),
+            ));
+        }
+        Ok(())
+    }
+}
+
+impl DelTableCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        self.validate()?;
+        let kv_backend = self.store.build().await?;
+        Ok(Box::new(DelTableTool {
+            table_id: self.table_id,
+            table_name: self.table_name.clone(),
+            schema_name: self.schema_name.clone(),
+            catalog_name: self.catalog_name.clone(),
+            table_name_manager: TableNameManager::new(kv_backend.clone()),
+            table_metadata_deleter: TableMetadataDeleter::new(kv_backend),
+        }))
+    }
+}
+
+struct DelTableTool {
+    table_id: Option<u32>,
+    table_name: Option<String>,
+    schema_name: String,
+    catalog_name: String,
+    table_name_manager: TableNameManager,
+    table_metadata_deleter: TableMetadataDeleter,
+}
+
+#[async_trait]
+impl Tool for DelTableTool {
+    async fn do_work(&self) -> Result<(), BoxedError> {
+        let table_id = if let Some(table_name) = &self.table_name {
+            let catalog_name = &self.catalog_name;
+            let schema_name = &self.schema_name;
+
+            let Some(table_id) = get_table_id_by_name(
+                &self.table_name_manager,
+                catalog_name,
+                schema_name,
+                table_name,
+            )
+            .await?
+            else {
+                println!(
+                    "Table({}) not found",
+                    format_full_table_name(catalog_name, schema_name, table_name)
+                );
+                return Ok(());
+            };
+            table_id
+        } else {
+            // Safety: we have validated that table_id or table_name is not None
+            self.table_id.unwrap()
+        };
+        self.table_metadata_deleter.delete(table_id).await?;
+        println!("Table({}) deleted", table_id);
+
+        Ok(())
+    }
+}
+
+struct TableMetadataDeleter {
+    table_metadata_manager: TableMetadataManager,
+}
+
+impl TableMetadataDeleter {
+    fn new(kv_backend: KvBackendRef) -> Self {
+        Self {
+            table_metadata_manager: TableMetadataManager::new_with_custom_tombstone_prefix(
+                kv_backend,
+                CLI_TOMBSTONE_PREFIX,
+            ),
+        }
+    }
+
+    async fn delete(&self, table_id: TableId) -> Result<(), BoxedError> {
+        let (table_info, table_route) = self
+            .table_metadata_manager
+            .get_full_table_info(table_id)
+            .await
+            .map_err(BoxedError::new)?;
+        let Some(table_info) = table_info else {
+            return Err(BoxedError::new(TableNotFoundSnafu { table_id }.build()));
+        };
+        let Some(table_route) = table_route else {
+            return Err(BoxedError::new(TableNotFoundSnafu { table_id }.build()));
+        };
+        let physical_table_id = self
+            .table_metadata_manager
+            .table_route_manager()
+            .get_physical_table_id(table_id)
+            .await
+            .map_err(BoxedError::new)?;
+
+        let table_name = table_info.table_name();
+        let region_wal_options = get_region_wal_options(
+            &self.table_metadata_manager,
+            &table_route,
+            physical_table_id,
+        )
+        .await
+        .map_err(BoxedError::new)?;
+
+        self.table_metadata_manager
+            .delete_table_metadata(table_id, &table_name, &table_route, &region_wal_options)
+            .await
+            .map_err(BoxedError::new)?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use common_error::ext::ErrorExt;
+    use common_error::status_code::StatusCode;
+    use common_meta::key::table_route::TableRouteValue;
+    use common_meta::key::TableMetadataManager;
+    use common_meta::kv_backend::chroot::ChrootKvBackend;
+    use common_meta::kv_backend::memory::MemoryKvBackend;
+    use common_meta::kv_backend::{KvBackend, KvBackendRef};
+    use common_meta::rpc::store::RangeRequest;
+
+    use crate::metadata::control::del::table::TableMetadataDeleter;
+    use crate::metadata::control::del::CLI_TOMBSTONE_PREFIX;
+    use crate::metadata::control::test_utils::prepare_physical_table_metadata;
+
+    #[tokio::test]
+    async fn test_delete_table_not_found() {
+        let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
+
+        let table_metadata_deleter = TableMetadataDeleter::new(kv_backend);
+        let table_id = 1;
+        let err = table_metadata_deleter.delete(table_id).await.unwrap_err();
+        assert_eq!(err.status_code(), StatusCode::TableNotFound);
+    }
+
+    #[tokio::test]
+    async fn test_delete_table_metadata() {
+        let kv_backend = Arc::new(MemoryKvBackend::new());
+        let table_metadata_manager = TableMetadataManager::new(kv_backend.clone());
+        let table_id = 1024;
+        let (table_info, table_route) = prepare_physical_table_metadata("my_table", table_id).await;
+        table_metadata_manager
+            .create_table_metadata(
+                table_info,
+                TableRouteValue::Physical(table_route),
+                HashMap::new(),
+            )
+            .await
+            .unwrap();
+
+        let total_keys = kv_backend.len();
+        assert!(total_keys > 0);
+
+        let table_metadata_deleter = TableMetadataDeleter::new(kv_backend.clone());
+        table_metadata_deleter.delete(table_id).await.unwrap();
+
+        // Check the tombstone keys are deleted
+        let chroot =
+            ChrootKvBackend::new(CLI_TOMBSTONE_PREFIX.as_bytes().to_vec(), kv_backend.clone());
+        let req = RangeRequest::default().with_range(vec![0], vec![0]);
+        let resp = chroot.range(req).await.unwrap();
+        assert_eq!(resp.kvs.len(), total_keys);
+    }
+}
--- a/src/cli/src/metadata/control/get.rs
+++ b/src/cli/src/metadata/control/get.rs
@@ -0,0 +1,247 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::min;
+
+use async_trait::async_trait;
+use clap::{Parser, Subcommand};
+use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_catalog::format_full_table_name;
+use common_error::ext::BoxedError;
+use common_meta::key::table_info::TableInfoKey;
+use common_meta::key::table_route::TableRouteKey;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
+use common_meta::rpc::store::RangeRequest;
+use futures::TryStreamExt;
+
+use crate::error::InvalidArgumentsSnafu;
+use crate::metadata::common::StoreConfig;
+use crate::metadata::control::utils::{decode_key_value, get_table_id_by_name, json_fromatter};
+use crate::Tool;
+
+/// Getting metadata from metadata store.
+#[derive(Subcommand)]
+pub enum GetCommand {
+    Key(GetKeyCommand),
+    Table(GetTableCommand),
+}
+
+impl GetCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        match self {
+            GetCommand::Key(cmd) => cmd.build().await,
+            GetCommand::Table(cmd) => cmd.build().await,
+        }
+    }
+}
+
+/// Get key-value pairs from the metadata store.
+#[derive(Debug, Default, Parser)]
+pub struct GetKeyCommand {
+    /// The key to get from the metadata store.
+    #[clap(default_value = "")]
+    key: String,
+
+    /// Whether to perform a prefix query. If true, returns all key-value pairs where the key starts with the given prefix.
+    #[clap(long, default_value = "false")]
+    prefix: bool,
+
+    /// The maximum number of key-value pairs to return. If 0, returns all key-value pairs.
+    #[clap(long, default_value = "0")]
+    limit: u64,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+}
+
+impl GetKeyCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        let kvbackend = self.store.build().await?;
+        Ok(Box::new(GetKeyTool {
+            kvbackend,
+            key: self.key.clone(),
+            prefix: self.prefix,
+            limit: self.limit,
+        }))
+    }
+}
+
+struct GetKeyTool {
+    kvbackend: KvBackendRef,
+    key: String,
+    prefix: bool,
+    limit: u64,
+}
+
+#[async_trait]
+impl Tool for GetKeyTool {
+    async fn do_work(&self) -> Result<(), BoxedError> {
+        let mut req = RangeRequest::default();
+        if self.prefix {
+            req = req.with_prefix(self.key.as_bytes());
+        } else {
+            req = req.with_key(self.key.as_bytes());
+        }
+        let page_size = if self.limit > 0 {
+            min(self.limit as usize, DEFAULT_PAGE_SIZE)
+        } else {
+            DEFAULT_PAGE_SIZE
+        };
+        let pagination_stream =
+            PaginationStream::new(self.kvbackend.clone(), req, page_size, decode_key_value);
+        let mut stream = Box::pin(pagination_stream.into_stream());
+        let mut counter = 0;
+
+        while let Some((key, value)) = stream.try_next().await.map_err(BoxedError::new)? {
+            print!("{}\n{}\n", key, value);
+            counter += 1;
+            if self.limit > 0 && counter >= self.limit {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Get table metadata from the metadata store via table id.
+#[derive(Debug, Default, Parser)]
+pub struct GetTableCommand {
+    /// Get table metadata by table id.
+    #[clap(long)]
+    table_id: Option<u32>,
+
+    /// Get table metadata by table name.
+    #[clap(long)]
+    table_name: Option<String>,
+
+    /// The schema name of the table.
+    #[clap(long, default_value = DEFAULT_SCHEMA_NAME)]
+    schema_name: String,
+
+    /// The catalog name of the table.
+    #[clap(long, default_value = DEFAULT_CATALOG_NAME)]
+    catalog_name: String,
+
+    /// Pretty print the output.
+    #[clap(long, default_value = "false")]
+    pretty: bool,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+}
+
+impl GetTableCommand {
+    pub fn validate(&self) -> Result<(), BoxedError> {
+        if matches!(
+            (&self.table_id, &self.table_name),
+            (Some(_), Some(_)) | (None, None)
+        ) {
+            return Err(BoxedError::new(
+                InvalidArgumentsSnafu {
+                    msg: "You must specify either --table-id or --table-name.",
+                }
+                .build(),
+            ));
+        }
+        Ok(())
+    }
+}
+
+struct GetTableTool {
+    kvbackend: KvBackendRef,
+    table_id: Option<u32>,
+    table_name: Option<String>,
+    schema_name: String,
+    catalog_name: String,
+    pretty: bool,
+}
+
+#[async_trait]
+impl Tool for GetTableTool {
+    async fn do_work(&self) -> Result<(), BoxedError> {
+        let table_metadata_manager = TableMetadataManager::new(self.kvbackend.clone());
+        let table_name_manager = table_metadata_manager.table_name_manager();
+        let table_info_manager = table_metadata_manager.table_info_manager();
+        let table_route_manager = table_metadata_manager.table_route_manager();
+
+        let table_id = if let Some(table_name) = &self.table_name {
+            let catalog_name = &self.catalog_name;
+            let schema_name = &self.schema_name;
+
+            let Some(table_id) =
+                get_table_id_by_name(table_name_manager, catalog_name, schema_name, table_name)
+                    .await?
+            else {
+                println!(
+                    "Table({}) not found",
+                    format_full_table_name(catalog_name, schema_name, table_name)
+                );
+                return Ok(());
+            };
+            table_id
+        } else {
+            // Safety: we have validated that table_id or table_name is not None
+            self.table_id.unwrap()
+        };
+
+        let table_info = table_info_manager
+            .get(table_id)
+            .await
+            .map_err(BoxedError::new)?;
+        if let Some(table_info) = table_info {
+            println!(
+                "{}\n{}",
+                TableInfoKey::new(table_id),
+                json_fromatter(self.pretty, &*table_info)
+            );
+        } else {
+            println!("Table info not found");
+        }
+
+        let table_route = table_route_manager
+            .table_route_storage()
+            .get(table_id)
+            .await
+            .map_err(BoxedError::new)?;
+        if let Some(table_route) = table_route {
+            println!(
+                "{}\n{}",
+                TableRouteKey::new(table_id),
+                json_fromatter(self.pretty, &table_route)
+            );
+        } else {
+            println!("Table route not found");
+        }
+
+        Ok(())
+    }
+}
+
+impl GetTableCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        self.validate()?;
+        let kvbackend = self.store.build().await?;
+        Ok(Box::new(GetTableTool {
+            kvbackend,
+            table_id: self.table_id,
+            table_name: self.table_name.clone(),
+            schema_name: self.schema_name.clone(),
+            catalog_name: self.catalog_name.clone(),
+            pretty: self.pretty,
+        }))
+    }
+}
--- a/src/cli/src/metadata/control/test_utils.rs
+++ b/src/cli/src/metadata/control/test_utils.rs
@@ -0,0 +1,51 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_meta::ddl::test_util::test_create_physical_table_task;
+use common_meta::key::table_route::PhysicalTableRouteValue;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{Region, RegionRoute};
+use common_meta::rpc::store::PutRequest;
+use store_api::storage::{RegionId, TableId};
+use table::metadata::RawTableInfo;
+
+/// Puts a key-value pair into the kv backend.
+pub async fn put_key(kv_backend: &KvBackendRef, key: &str, value: &str) {
+    let put_req = PutRequest::new()
+        .with_key(key.as_bytes())
+        .with_value(value.as_bytes());
+    kv_backend.put(put_req).await.unwrap();
+}
+
+/// Prepares the physical table metadata for testing.
+///
+/// Returns the table info and the table route.
+pub async fn prepare_physical_table_metadata(
+    table_name: &str,
+    table_id: TableId,
+) -> (RawTableInfo, PhysicalTableRouteValue) {
+    let mut create_physical_table_task = test_create_physical_table_task(table_name);
+    let table_route = PhysicalTableRouteValue::new(vec![RegionRoute {
+        region: Region {
+            id: RegionId::new(table_id, 1),
+            ..Default::default()
+        },
+        leader_peer: Some(Peer::empty(1)),
+        ..Default::default()
+    }]);
+    create_physical_table_task.set_table_id(table_id);
+
+    (create_physical_table_task.table_info, table_route)
+}
--- a/src/cli/src/metadata/control/utils.rs
+++ b/src/cli/src/metadata/control/utils.rs
@@ -0,0 +1,57 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_error::ext::BoxedError;
+use common_meta::error::Result as CommonMetaResult;
+use common_meta::key::table_name::{TableNameKey, TableNameManager};
+use common_meta::rpc::KeyValue;
+use serde::Serialize;
+use store_api::storage::TableId;
+
+/// Decodes a key-value pair into a string.
+pub fn decode_key_value(kv: KeyValue) -> CommonMetaResult<(String, String)> {
+    let key = String::from_utf8_lossy(&kv.key).to_string();
+    let value = String::from_utf8_lossy(&kv.value).to_string();
+    Ok((key, value))
+}
+
+/// Formats a value as a JSON string.
+pub fn json_fromatter<T>(pretty: bool, value: &T) -> String
+where
+    T: Serialize,
+{
+    if pretty {
+        serde_json::to_string_pretty(value).unwrap()
+    } else {
+        serde_json::to_string(value).unwrap()
+    }
+}
+
+/// Gets the table id by table name.
+pub async fn get_table_id_by_name(
+    table_name_manager: &TableNameManager,
+    catalog_name: &str,
+    schema_name: &str,
+    table_name: &str,
+) -> Result<Option<TableId>, BoxedError> {
+    let table_name_key = TableNameKey::new(catalog_name, schema_name, table_name);
+    let Some(table_name_value) = table_name_manager
+        .get(table_name_key)
+        .await
+        .map_err(BoxedError::new)?
+    else {
+        return Ok(None);
+    };
+    Ok(Some(table_name_value.table_id()))
+}
--- a/src/cli/src/metadata/repair.rs
+++ b/src/cli/src/metadata/repair.rs
@@ -0,0 +1,369 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod alter_table;
+mod create_table;
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use clap::Parser;
+use client::api::v1::CreateTableExpr;
+use client::client_manager::NodeClients;
+use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_error::ext::{BoxedError, ErrorExt};
+use common_error::status_code::StatusCode;
+use common_grpc::channel_manager::ChannelConfig;
+use common_meta::error::Error as CommonMetaError;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::node_manager::NodeManagerRef;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{find_leaders, RegionRoute};
+use common_telemetry::{error, info, warn};
+use futures::TryStreamExt;
+use snafu::{ensure, ResultExt};
+use store_api::storage::TableId;
+
+use crate::error::{
+    InvalidArgumentsSnafu, Result, SendRequestToDatanodeSnafu, TableMetadataSnafu, UnexpectedSnafu,
+};
+use crate::metadata::common::StoreConfig;
+use crate::metadata::utils::{FullTableMetadata, IteratorInput, TableMetadataIterator};
+use crate::Tool;
+
+/// Repair metadata of logical tables.
+#[derive(Debug, Default, Parser)]
+pub struct RepairLogicalTablesCommand {
+    /// The names of the tables to repair.
+    #[clap(long, value_delimiter = ',', alias = "table-name")]
+    table_names: Vec<String>,
+
+    /// The id of the table to repair.
+    #[clap(long, value_delimiter = ',', alias = "table-id")]
+    table_ids: Vec<TableId>,
+
+    /// The schema of the tables to repair.
+    #[clap(long, default_value = DEFAULT_SCHEMA_NAME)]
+    schema_name: String,
+
+    /// The catalog of the tables to repair.
+    #[clap(long, default_value = DEFAULT_CATALOG_NAME)]
+    catalog_name: String,
+
+    /// Whether to fail fast if any repair operation fails.
+    #[clap(long)]
+    fail_fast: bool,
+
+    #[clap(flatten)]
+    store: StoreConfig,
+
+    /// The timeout for the client to operate the datanode.
+    #[clap(long, default_value_t = 30)]
+    client_timeout_secs: u64,
+
+    /// The timeout for the client to connect to the datanode.
+    #[clap(long, default_value_t = 3)]
+    client_connect_timeout_secs: u64,
+}
+
+impl RepairLogicalTablesCommand {
+    fn validate(&self) -> Result<()> {
+        ensure!(
+            !self.table_names.is_empty() || !self.table_ids.is_empty(),
+            InvalidArgumentsSnafu {
+                msg: "You must specify --table-names or --table-ids.",
+            }
+        );
+        Ok(())
+    }
+}
+
+impl RepairLogicalTablesCommand {
+    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
+        self.validate().map_err(BoxedError::new)?;
+        let kv_backend = self.store.build().await?;
+        let node_client_channel_config = ChannelConfig::new()
+            .timeout(Duration::from_secs(self.client_timeout_secs))
+            .connect_timeout(Duration::from_secs(self.client_connect_timeout_secs));
+        let node_manager = Arc::new(NodeClients::new(node_client_channel_config));
+
+        Ok(Box::new(RepairTool {
+            table_names: self.table_names.clone(),
+            table_ids: self.table_ids.clone(),
+            schema_name: self.schema_name.clone(),
+            catalog_name: self.catalog_name.clone(),
+            fail_fast: self.fail_fast,
+            kv_backend,
+            node_manager,
+        }))
+    }
+}
+
+struct RepairTool {
+    table_names: Vec<String>,
+    table_ids: Vec<TableId>,
+    schema_name: String,
+    catalog_name: String,
+    fail_fast: bool,
+    kv_backend: KvBackendRef,
+    node_manager: NodeManagerRef,
+}
+
+#[async_trait]
+impl Tool for RepairTool {
+    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
+        self.repair_tables().await.map_err(BoxedError::new)
+    }
+}
+
+impl RepairTool {
+    fn generate_iterator_input(&self) -> Result<IteratorInput> {
+        if !self.table_names.is_empty() {
+            let table_names = &self.table_names;
+            let catalog = &self.catalog_name;
+            let schema_name = &self.schema_name;
+
+            let table_names = table_names
+                .iter()
+                .map(|table_name| {
+                    (
+                        catalog.to_string(),
+                        schema_name.to_string(),
+                        table_name.to_string(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            return Ok(IteratorInput::new_table_names(table_names));
+        } else if !self.table_ids.is_empty() {
+            return Ok(IteratorInput::new_table_ids(self.table_ids.clone()));
+        };
+
+        InvalidArgumentsSnafu {
+            msg: "You must specify --table-names or --table-id.",
+        }
+        .fail()
+    }
+
+    async fn repair_tables(&self) -> Result<()> {
+        let input = self.generate_iterator_input()?;
+        let mut table_metadata_iterator =
+            Box::pin(TableMetadataIterator::new(self.kv_backend.clone(), input).into_stream());
+        let table_metadata_manager = TableMetadataManager::new(self.kv_backend.clone());
+
+        let mut skipped_table = 0;
+        let mut success_table = 0;
+        while let Some(full_table_metadata) = table_metadata_iterator.try_next().await? {
+            let full_table_name = full_table_metadata.full_table_name();
+            if !full_table_metadata.is_metric_engine() {
+                warn!(
+                    "Skipping repair for non-metric engine table: {}",
+                    full_table_name
+                );
+                skipped_table += 1;
+                continue;
+            }
+
+            if full_table_metadata.is_physical_table() {
+                warn!("Skipping repair for physical table: {}", full_table_name);
+                skipped_table += 1;
+                continue;
+            }
+
+            let (physical_table_id, physical_table_route) = table_metadata_manager
+                .table_route_manager()
+                .get_physical_table_route(full_table_metadata.table_id)
+                .await
+                .context(TableMetadataSnafu)?;
+
+            if let Err(err) = self
+                .repair_table(
+                    &full_table_metadata,
+                    physical_table_id,
+                    &physical_table_route.region_routes,
+                )
+                .await
+            {
+                error!(
+                    err;
+                    "Failed to repair table: {}, skipped table: {}",
+                    full_table_name,
+                    skipped_table,
+                );
+
+                if self.fail_fast {
+                    return Err(err);
+                }
+            } else {
+                success_table += 1;
+            }
+        }
+
+        info!(
+            "Repair logical tables result: {} tables repaired, {} tables skipped",
+            success_table, skipped_table
+        );
+
+        Ok(())
+    }
+
+    async fn alter_table_on_datanodes(
+        &self,
+        full_table_metadata: &FullTableMetadata,
+        physical_region_routes: &[RegionRoute],
+    ) -> Result<Vec<(Peer, CommonMetaError)>> {
+        let logical_table_id = full_table_metadata.table_id;
+        let alter_table_expr = alter_table::generate_alter_table_expr_for_all_columns(
+            &full_table_metadata.table_info,
+        )?;
+        let node_manager = self.node_manager.clone();
+
+        let mut failed_peers = Vec::new();
+        info!(
+            "Sending alter table requests to all datanodes for table: {}, number of regions:{}.",
+            full_table_metadata.full_table_name(),
+            physical_region_routes.len()
+        );
+        let leaders = find_leaders(physical_region_routes);
+        for peer in &leaders {
+            let alter_table_request = alter_table::make_alter_region_request_for_peer(
+                logical_table_id,
+                &alter_table_expr,
+                full_table_metadata.table_info.ident.version,
+                peer,
+                physical_region_routes,
+            )?;
+            let datanode = node_manager.datanode(peer).await;
+            if let Err(err) = datanode.handle(alter_table_request).await {
+                failed_peers.push((peer.clone(), err));
+            }
+        }
+
+        Ok(failed_peers)
+    }
+
+    async fn create_table_on_datanode(
+        &self,
+        create_table_expr: &CreateTableExpr,
+        logical_table_id: TableId,
+        physical_table_id: TableId,
+        peer: &Peer,
+        physical_region_routes: &[RegionRoute],
+    ) -> Result<()> {
+        let node_manager = self.node_manager.clone();
+        let datanode = node_manager.datanode(peer).await;
+        let create_table_request = create_table::make_create_region_request_for_peer(
+            logical_table_id,
+            physical_table_id,
+            create_table_expr,
+            peer,
+            physical_region_routes,
+        )?;
+
+        datanode
+            .handle(create_table_request)
+            .await
+            .with_context(|_| SendRequestToDatanodeSnafu { peer: peer.clone() })?;
+
+        Ok(())
+    }
+
+    async fn repair_table(
+        &self,
+        full_table_metadata: &FullTableMetadata,
+        physical_table_id: TableId,
+        physical_region_routes: &[RegionRoute],
+    ) -> Result<()> {
+        let full_table_name = full_table_metadata.full_table_name();
+        // First we sends alter table requests to all datanodes with all columns.
+        let failed_peers = self
+            .alter_table_on_datanodes(full_table_metadata, physical_region_routes)
+            .await?;
+
+        if failed_peers.is_empty() {
+            info!(
+                "All alter table requests sent successfully for table: {}",
+                full_table_name
+            );
+            return Ok(());
+        }
+        warn!(
+            "Sending alter table requests to datanodes for table: {} failed for the datanodes: {:?}",
+            full_table_name,
+            failed_peers.iter().map(|(peer, _)| peer.id).collect::<Vec<_>>()
+        );
+
+        let create_table_expr =
+            create_table::generate_create_table_expr(&full_table_metadata.table_info)?;
+
+        let mut errors = Vec::new();
+        for (peer, err) in failed_peers {
+            if err.status_code() != StatusCode::RegionNotFound {
+                error!(
+                    err;
+                    "Sending alter table requests to datanode: {} for table: {} failed",
+                    peer.id,
+                    full_table_name,
+                );
+                continue;
+            }
+            info!(
+                "Region not found for table: {}, datanode: {}, trying to create the logical table on that datanode",
+                full_table_name,
+                peer.id
+            );
+
+            // If the alter table request fails for any datanode, we attempt to create the table on that datanode
+            // as a fallback mechanism to ensure table consistency across the cluster.
+            if let Err(err) = self
+                .create_table_on_datanode(
+                    &create_table_expr,
+                    full_table_metadata.table_id,
+                    physical_table_id,
+                    &peer,
+                    physical_region_routes,
+                )
+                .await
+            {
+                error!(
+                    err;
+                    "Failed to create table on datanode: {} for table: {}",
+                    peer.id, full_table_name
+                );
+                errors.push(err);
+                if self.fail_fast {
+                    break;
+                }
+            } else {
+                info!(
+                    "Created table on datanode: {} for table: {}",
+                    peer.id, full_table_name
+                );
+            }
+        }
+
+        if !errors.is_empty() {
+            return UnexpectedSnafu {
+                msg: format!(
+                    "Failed to create table on datanodes for table: {}",
+                    full_table_name,
+                ),
+            }
+            .fail();
+        }
+
+        Ok(())
+    }
+}
--- a/src/cli/src/metadata/repair/alter_table.rs
+++ b/src/cli/src/metadata/repair/alter_table.rs
@@ -0,0 +1,85 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use client::api::v1::alter_table_expr::Kind;
+use client::api::v1::region::{region_request, AlterRequests, RegionRequest, RegionRequestHeader};
+use client::api::v1::{AddColumn, AddColumns, AlterTableExpr};
+use common_meta::ddl::alter_logical_tables::make_alter_region_request;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{find_leader_regions, RegionRoute};
+use operator::expr_helper::column_schemas_to_defs;
+use snafu::ResultExt;
+use store_api::storage::{RegionId, TableId};
+use table::metadata::RawTableInfo;
+
+use crate::error::{CovertColumnSchemasToDefsSnafu, Result};
+
+/// Generates alter table expression for all columns.
+pub fn generate_alter_table_expr_for_all_columns(
+    table_info: &RawTableInfo,
+) -> Result<AlterTableExpr> {
+    let schema = &table_info.meta.schema;
+
+    let mut alter_table_expr = AlterTableExpr {
+        catalog_name: table_info.catalog_name.to_string(),
+        schema_name: table_info.schema_name.to_string(),
+        table_name: table_info.name.to_string(),
+        ..Default::default()
+    };
+
+    let primary_keys = table_info
+        .meta
+        .primary_key_indices
+        .iter()
+        .map(|i| schema.column_schemas[*i].name.clone())
+        .collect::<Vec<_>>();
+
+    let add_columns = column_schemas_to_defs(schema.column_schemas.clone(), &primary_keys)
+        .context(CovertColumnSchemasToDefsSnafu)?;
+
+    alter_table_expr.kind = Some(Kind::AddColumns(AddColumns {
+        add_columns: add_columns
+            .into_iter()
+            .map(|col| AddColumn {
+                column_def: Some(col),
+                location: None,
+                add_if_not_exists: true,
+            })
+            .collect(),
+    }));
+
+    Ok(alter_table_expr)
+}
+
+/// Makes an alter region request for a peer.
+pub fn make_alter_region_request_for_peer(
+    logical_table_id: TableId,
+    alter_table_expr: &AlterTableExpr,
+    schema_version: u64,
+    peer: &Peer,
+    region_routes: &[RegionRoute],
+) -> Result<RegionRequest> {
+    let regions_on_this_peer = find_leader_regions(region_routes, peer);
+    let mut requests = Vec::with_capacity(regions_on_this_peer.len());
+    for region_number in &regions_on_this_peer {
+        let region_id = RegionId::new(logical_table_id, *region_number);
+        let request = make_alter_region_request(region_id, alter_table_expr, schema_version);
+        requests.push(request);
+    }
+
+    Ok(RegionRequest {
+        header: Some(RegionRequestHeader::default()),
+        body: Some(region_request::Body::Alters(AlterRequests { requests })),
+    })
+}
--- a/src/cli/src/metadata/repair/create_table.rs
+++ b/src/cli/src/metadata/repair/create_table.rs
@@ -0,0 +1,89 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use client::api::v1::region::{region_request, CreateRequests, RegionRequest, RegionRequestHeader};
+use client::api::v1::CreateTableExpr;
+use common_meta::ddl::create_logical_tables::create_region_request_builder;
+use common_meta::ddl::utils::region_storage_path;
+use common_meta::peer::Peer;
+use common_meta::rpc::router::{find_leader_regions, RegionRoute};
+use operator::expr_helper::column_schemas_to_defs;
+use snafu::ResultExt;
+use store_api::storage::{RegionId, TableId};
+use table::metadata::RawTableInfo;
+
+use crate::error::{CovertColumnSchemasToDefsSnafu, Result};
+
+/// Generates a `CreateTableExpr` from a `RawTableInfo`.
+pub fn generate_create_table_expr(table_info: &RawTableInfo) -> Result<CreateTableExpr> {
+    let schema = &table_info.meta.schema;
+    let primary_keys = table_info
+        .meta
+        .primary_key_indices
+        .iter()
+        .map(|i| schema.column_schemas[*i].name.clone())
+        .collect::<Vec<_>>();
+
+    let timestamp_index = schema.timestamp_index.as_ref().unwrap();
+    let time_index = schema.column_schemas[*timestamp_index].name.clone();
+    let column_defs = column_schemas_to_defs(schema.column_schemas.clone(), &primary_keys)
+        .context(CovertColumnSchemasToDefsSnafu)?;
+    let table_options = HashMap::from(&table_info.meta.options);
+
+    Ok(CreateTableExpr {
+        catalog_name: table_info.catalog_name.to_string(),
+        schema_name: table_info.schema_name.to_string(),
+        table_name: table_info.name.to_string(),
+        desc: String::default(),
+        column_defs,
+        time_index,
+        primary_keys,
+        create_if_not_exists: true,
+        table_options,
+        table_id: None,
+        engine: table_info.meta.engine.to_string(),
+    })
+}
+
+/// Makes a create region request for a peer.
+pub fn make_create_region_request_for_peer(
+    logical_table_id: TableId,
+    physical_table_id: TableId,
+    create_table_expr: &CreateTableExpr,
+    peer: &Peer,
+    region_routes: &[RegionRoute],
+) -> Result<RegionRequest> {
+    let regions_on_this_peer = find_leader_regions(region_routes, peer);
+    let mut requests = Vec::with_capacity(regions_on_this_peer.len());
+    let request_builder =
+        create_region_request_builder(create_table_expr, physical_table_id).unwrap();
+
+    let catalog = &create_table_expr.catalog_name;
+    let schema = &create_table_expr.schema_name;
+    let storage_path = region_storage_path(catalog, schema);
+
+    for region_number in &regions_on_this_peer {
+        let region_id = RegionId::new(logical_table_id, *region_number);
+        let region_request =
+            request_builder.build_one(region_id, storage_path.clone(), &HashMap::new());
+        requests.push(region_request);
+    }
+
+    Ok(RegionRequest {
+        header: Some(RegionRequestHeader::default()),
+        body: Some(region_request::Body::Creates(CreateRequests { requests })),
+    })
+}
--- a/src/cli/src/metadata/snapshot.rs
+++ b/src/cli/src/metadata/snapshot.rs
@@ -12,96 +12,38 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::sync::Arc;
+use std::path::Path;

 use async_trait::async_trait;
-use clap::Parser;
+use clap::{Parser, Subcommand};
 use common_base::secrets::{ExposeSecret, SecretString};
 use common_error::ext::BoxedError;
-use common_meta::kv_backend::chroot::ChrootKvBackend;
-use common_meta::kv_backend::etcd::EtcdStore;
-use common_meta::kv_backend::KvBackendRef;
 use common_meta::snapshot::MetadataSnapshotManager;
-use meta_srv::bootstrap::create_etcd_client;
-use meta_srv::metasrv::BackendImpl;
 use object_store::services::{Fs, S3};
 use object_store::ObjectStore;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};

-use crate::error::{KvBackendNotSetSnafu, OpenDalSnafu, S3ConfigNotSetSnafu};
+use crate::error::{InvalidFilePathSnafu, OpenDalSnafu, S3ConfigNotSetSnafu};
+use crate::metadata::common::StoreConfig;
 use crate::Tool;
-#[derive(Debug, Default, Parser)]
-struct MetaConnection {
-    /// The endpoint of store. one of etcd, pg or mysql.
-    #[clap(long, alias = "store-addr", value_delimiter = ',', num_args = 1..)]
-    store_addrs: Vec<String>,
-    /// The database backend.
-    #[clap(long, value_enum)]
-    backend: Option<BackendImpl>,
-    #[clap(long, default_value = "")]
-    store_key_prefix: String,
-    #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
-    #[clap(long,default_value = common_meta::kv_backend::DEFAULT_META_TABLE_NAME)]
-    meta_table_name: String,
-    #[clap(long, default_value = "128")]
-    max_txn_ops: usize,
+
+/// Subcommand for metadata snapshot operations, including saving snapshots, restoring from snapshots, and viewing snapshot information.
+#[derive(Subcommand)]
+pub enum SnapshotCommand {
+    /// Save a snapshot of the current metadata state to a specified location.
+    Save(SaveCommand),
+    /// Restore metadata from a snapshot.
+    Restore(RestoreCommand),
+    /// Explore metadata from a snapshot.
+    Info(InfoCommand),
 }

-impl MetaConnection {
-    pub async fn build(&self) -> Result<KvBackendRef, BoxedError> {
-        let max_txn_ops = self.max_txn_ops;
-        let store_addrs = &self.store_addrs;
-        if store_addrs.is_empty() {
-            KvBackendNotSetSnafu { backend: "all" }
-                .fail()
-                .map_err(BoxedError::new)
-        } else {
-            let kvbackend = match self.backend {
-                Some(BackendImpl::EtcdStore) => {
-                    let etcd_client = create_etcd_client(store_addrs)
-                        .await
-                        .map_err(BoxedError::new)?;
-                    Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
-                }
-                #[cfg(feature = "pg_kvbackend")]
-                Some(BackendImpl::PostgresStore) => {
-                    let table_name = &self.meta_table_name;
-                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs)
-                        .await
-                        .map_err(BoxedError::new)?;
-                    Ok(common_meta::kv_backend::rds::PgStore::with_pg_pool(
-                        pool,
-                        table_name,
-                        max_txn_ops,
-                    )
-                    .await
-                    .map_err(BoxedError::new)?)
-                }
-                #[cfg(feature = "mysql_kvbackend")]
-                Some(BackendImpl::MysqlStore) => {
-                    let table_name = &self.meta_table_name;
-                    let pool = meta_srv::bootstrap::create_mysql_pool(store_addrs)
-                        .await
-                        .map_err(BoxedError::new)?;
-                    Ok(common_meta::kv_backend::rds::MySqlStore::with_mysql_pool(
-                        pool,
-                        table_name,
-                        max_txn_ops,
-                    )
-                    .await
-                    .map_err(BoxedError::new)?)
-                }
-                _ => KvBackendNotSetSnafu { backend: "all" }
-                    .fail()
-                    .map_err(BoxedError::new),
-            };
-            if self.store_key_prefix.is_empty() {
-                kvbackend
-            } else {
-                let chroot_kvbackend =
-                    ChrootKvBackend::new(self.store_key_prefix.as_bytes().to_vec(), kvbackend?);
-                Ok(Arc::new(chroot_kvbackend))
-            }
+impl SnapshotCommand {
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        match self {
+            SnapshotCommand::Save(cmd) => cmd.build().await,
+            SnapshotCommand::Restore(cmd) => cmd.build().await,
+            SnapshotCommand::Info(cmd) => cmd.build().await,
        }
    }
 }
@@ -170,10 +112,10 @@ impl S3Config {
 /// It will dump the metadata snapshot to local file or s3 bucket.
 /// The snapshot file will be in binary format.
 #[derive(Debug, Default, Parser)]
-pub struct MetaSnapshotCommand {
-    /// The connection to the metadata store.
+pub struct SaveCommand {
+    /// The store configuration.
    #[clap(flatten)]
-    connection: MetaConnection,
+    store: StoreConfig,
    /// The s3 config.
    #[clap(flatten)]
    s3_config: S3Config,
@@ -196,9 +138,9 @@ fn create_local_file_object_store(root: &str) -> Result<ObjectStore, BoxedError>
    Ok(object_store)
 }

-impl MetaSnapshotCommand {
+impl SaveCommand {
    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
-        let kvbackend = self.connection.build().await?;
+        let kvbackend = self.store.build().await?;
        let output_dir = &self.output_dir;
        let object_store = self.s3_config.build(output_dir).map_err(BoxedError::new)?;
        if let Some(store) = object_store {
@@ -218,7 +160,7 @@ impl MetaSnapshotCommand {
    }
 }

-pub struct MetaSnapshotTool {
+struct MetaSnapshotTool {
    inner: MetadataSnapshotManager,
    target_file: String,
 }
@@ -234,14 +176,16 @@ impl Tool for MetaSnapshotTool {
    }
 }

-/// Restore metadata snapshot tool.
-/// This tool is used to restore metadata snapshot from etcd, pg or mysql.
-/// It will restore the metadata snapshot from local file or s3 bucket.
+/// Restore metadata from a snapshot file.
+///
+/// This command restores the metadata state from a previously saved snapshot.
+/// The snapshot can be loaded from either a local file system or an S3 bucket,
+/// depending on the provided configuration.
 #[derive(Debug, Default, Parser)]
-pub struct MetaRestoreCommand {
-    /// The connection to the metadata store.
+pub struct RestoreCommand {
+    /// The store configuration.
    #[clap(flatten)]
-    connection: MetaConnection,
+    store: StoreConfig,
    /// The s3 config.
    #[clap(flatten)]
    s3_config: S3Config,
@@ -255,9 +199,9 @@ pub struct MetaRestoreCommand {
    force: bool,
 }

-impl MetaRestoreCommand {
+impl RestoreCommand {
    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
-        let kvbackend = self.connection.build().await?;
+        let kvbackend = self.store.build().await?;
        let input_dir = &self.input_dir;
        let object_store = self.s3_config.build(input_dir).map_err(BoxedError::new)?;
        if let Some(store) = object_store {
@@ -279,7 +223,7 @@ impl MetaRestoreCommand {
    }
 }

-pub struct MetaRestoreTool {
+struct MetaRestoreTool {
    inner: MetadataSnapshotManager,
    source_file: String,
    force: bool,
@@ -327,3 +271,93 @@ impl Tool for MetaRestoreTool {
        }
    }
 }
+
+/// Explore metadata from a snapshot file.
+///
+/// This command allows filtering the metadata by a specific key and limiting the number of results.
+/// It prints the filtered metadata to the console.
+#[derive(Debug, Default, Parser)]
+pub struct InfoCommand {
+    /// The s3 config.
+    #[clap(flatten)]
+    s3_config: S3Config,
+    /// The name of the target snapshot file. we will add the file extension automatically.
+    #[clap(long, default_value = "metadata_snapshot")]
+    file_name: String,
+    /// The query string to filter the metadata.
+    #[clap(long, default_value = "*")]
+    inspect_key: String,
+    /// The limit of the metadata to query.
+    #[clap(long)]
+    limit: Option<usize>,
+}
+
+struct MetaInfoTool {
+    inner: ObjectStore,
+    source_file: String,
+    inspect_key: String,
+    limit: Option<usize>,
+}
+
+#[async_trait]
+impl Tool for MetaInfoTool {
+    #[allow(clippy::print_stdout)]
+    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
+        let result = MetadataSnapshotManager::info(
+            &self.inner,
+            &self.source_file,
+            &self.inspect_key,
+            self.limit,
+        )
+        .await
+        .map_err(BoxedError::new)?;
+        for item in result {
+            println!("{}", item);
+        }
+        Ok(())
+    }
+}
+
+impl InfoCommand {
+    fn decide_object_store_root_for_local_store(
+        file_path: &str,
+    ) -> Result<(&str, &str), BoxedError> {
+        let path = Path::new(file_path);
+        let parent = path
+            .parent()
+            .and_then(|p| p.to_str())
+            .context(InvalidFilePathSnafu { msg: file_path })
+            .map_err(BoxedError::new)?;
+        let file_name = path
+            .file_name()
+            .and_then(|f| f.to_str())
+            .context(InvalidFilePathSnafu { msg: file_path })
+            .map_err(BoxedError::new)?;
+        let root = if parent.is_empty() { "." } else { parent };
+        Ok((root, file_name))
+    }
+
+    pub async fn build(&self) -> Result<Box<dyn Tool>, BoxedError> {
+        let object_store = self.s3_config.build("").map_err(BoxedError::new)?;
+        if let Some(store) = object_store {
+            let tool = MetaInfoTool {
+                inner: store,
+                source_file: self.file_name.clone(),
+                inspect_key: self.inspect_key.clone(),
+                limit: self.limit,
+            };
+            Ok(Box::new(tool))
+        } else {
+            let (root, file_name) =
+                Self::decide_object_store_root_for_local_store(&self.file_name)?;
+            let object_store = create_local_file_object_store(root)?;
+            let tool = MetaInfoTool {
+                inner: object_store,
+                source_file: file_name.to_string(),
+                inspect_key: self.inspect_key.clone(),
+                limit: self.limit,
+            };
+            Ok(Box::new(tool))
+        }
+    }
+}
--- a/src/cli/src/metadata/utils.rs
+++ b/src/cli/src/metadata/utils.rs
@@ -0,0 +1,178 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::VecDeque;
+
+use async_stream::try_stream;
+use common_catalog::consts::METRIC_ENGINE;
+use common_catalog::format_full_table_name;
+use common_meta::key::table_name::TableNameKey;
+use common_meta::key::table_route::TableRouteValue;
+use common_meta::key::TableMetadataManager;
+use common_meta::kv_backend::KvBackendRef;
+use futures::Stream;
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::TableId;
+use table::metadata::RawTableInfo;
+
+use crate::error::{Result, TableMetadataSnafu, UnexpectedSnafu};
+
+/// The input for the iterator.
+pub enum IteratorInput {
+    TableIds(VecDeque<TableId>),
+    TableNames(VecDeque<(String, String, String)>),
+}
+
+impl IteratorInput {
+    /// Creates a new iterator input from a list of table ids.
+    pub fn new_table_ids(table_ids: Vec<TableId>) -> Self {
+        Self::TableIds(table_ids.into())
+    }
+
+    /// Creates a new iterator input from a list of table names.
+    pub fn new_table_names(table_names: Vec<(String, String, String)>) -> Self {
+        Self::TableNames(table_names.into())
+    }
+}
+
+/// An iterator for retrieving table metadata from the metadata store.
+///
+/// This struct provides functionality to iterate over table metadata based on
+/// either [`TableId`] and their associated regions or fully qualified table names.
+pub struct TableMetadataIterator {
+    input: IteratorInput,
+    table_metadata_manager: TableMetadataManager,
+}
+
+/// The full table metadata.
+pub struct FullTableMetadata {
+    pub table_id: TableId,
+    pub table_info: RawTableInfo,
+    pub table_route: TableRouteValue,
+}
+
+impl FullTableMetadata {
+    /// Returns true if it's [TableRouteValue::Physical].
+    pub fn is_physical_table(&self) -> bool {
+        self.table_route.is_physical()
+    }
+
+    /// Returns true if it's a metric engine table.
+    pub fn is_metric_engine(&self) -> bool {
+        self.table_info.meta.engine == METRIC_ENGINE
+    }
+
+    /// Returns the full table name.
+    pub fn full_table_name(&self) -> String {
+        format_full_table_name(
+            &self.table_info.catalog_name,
+            &self.table_info.schema_name,
+            &self.table_info.name,
+        )
+    }
+}
+
+impl TableMetadataIterator {
+    pub fn new(kvbackend: KvBackendRef, input: IteratorInput) -> Self {
+        let table_metadata_manager = TableMetadataManager::new(kvbackend);
+        Self {
+            input,
+            table_metadata_manager,
+        }
+    }
+
+    /// Returns the next table metadata.
+    ///
+    /// This method handles two types of inputs:
+    /// - TableIds: Returns metadata for a specific [`TableId`].
+    /// - TableNames: Returns metadata for a table identified by its full name (catalog.schema.table).
+    ///
+    /// Returns `None` when there are no more tables to process.
+    pub async fn next(&mut self) -> Result<Option<FullTableMetadata>> {
+        match &mut self.input {
+            IteratorInput::TableIds(table_ids) => {
+                if let Some(table_id) = table_ids.pop_front() {
+                    let full_table_metadata = self.get_table_metadata(table_id).await?;
+                    return Ok(Some(full_table_metadata));
+                }
+            }
+
+            IteratorInput::TableNames(table_names) => {
+                if let Some(full_table_name) = table_names.pop_front() {
+                    let table_id = self.get_table_id_by_name(full_table_name).await?;
+                    let full_table_metadata = self.get_table_metadata(table_id).await?;
+                    return Ok(Some(full_table_metadata));
+                }
+            }
+        }
+
+        Ok(None)
+    }
+
+    /// Converts the iterator into a stream of table metadata.
+    pub fn into_stream(mut self) -> impl Stream<Item = Result<FullTableMetadata>> {
+        try_stream!({
+            while let Some(full_table_metadata) = self.next().await? {
+                yield full_table_metadata;
+            }
+        })
+    }
+
+    async fn get_table_id_by_name(
+        &mut self,
+        (catalog_name, schema_name, table_name): (String, String, String),
+    ) -> Result<TableId> {
+        let key = TableNameKey::new(&catalog_name, &schema_name, &table_name);
+        let table_id = self
+            .table_metadata_manager
+            .table_name_manager()
+            .get(key)
+            .await
+            .context(TableMetadataSnafu)?
+            .with_context(|| UnexpectedSnafu {
+                msg: format!(
+                    "Table not found: {}",
+                    format_full_table_name(&catalog_name, &schema_name, &table_name)
+                ),
+            })?
+            .table_id();
+        Ok(table_id)
+    }
+
+    async fn get_table_metadata(&mut self, table_id: TableId) -> Result<FullTableMetadata> {
+        let (table_info, table_route) = self
+            .table_metadata_manager
+            .get_full_table_info(table_id)
+            .await
+            .context(TableMetadataSnafu)?;
+
+        let table_info = table_info
+            .with_context(|| UnexpectedSnafu {
+                msg: format!("Table info not found for table id: {table_id}"),
+            })?
+            .into_inner()
+            .table_info;
+        let table_route = table_route
+            .with_context(|| UnexpectedSnafu {
+                msg: format!("Table route not found for table id: {table_id}"),
+            })?
+            .into_inner();
+
+        Ok(FullTableMetadata {
+            table_id,
+            table_info,
+            table_route,
+        })
+    }
+}
--- a/src/client/src/client.rs
+++ b/src/client/src/client.rs
@@ -162,14 +162,23 @@ impl Client {
            .as_bytes() as usize
    }

-    pub fn make_flight_client(&self) -> Result<FlightClient> {
+    pub fn make_flight_client(
+        &self,
+        send_compression: bool,
+        accept_compression: bool,
+    ) -> Result<FlightClient> {
        let (addr, channel) = self.find_channel()?;

-        let client = FlightServiceClient::new(channel)
+        let mut client = FlightServiceClient::new(channel)
            .max_decoding_message_size(self.max_grpc_recv_message_size())
-            .max_encoding_message_size(self.max_grpc_send_message_size())
-            .accept_compressed(CompressionEncoding::Zstd)
-            .send_compressed(CompressionEncoding::Zstd);
+            .max_encoding_message_size(self.max_grpc_send_message_size());
+        // todo(hl): support compression methods.
+        if send_compression {
+            client = client.send_compressed(CompressionEncoding::Zstd);
+        }
+        if accept_compression {
+            client = client.accept_compressed(CompressionEncoding::Zstd);
+        }

        Ok(FlightClient { addr, client })
    }
@@ -178,9 +187,7 @@ impl Client {
        let (addr, channel) = self.find_channel()?;
        let client = PbRegionClient::new(channel)
            .max_decoding_message_size(self.max_grpc_recv_message_size())
-            .max_encoding_message_size(self.max_grpc_send_message_size())
-            .accept_compressed(CompressionEncoding::Zstd)
-            .send_compressed(CompressionEncoding::Zstd);
+            .max_encoding_message_size(self.max_grpc_send_message_size());
        Ok((addr, client))
    }

--- a/src/client/src/client_manager.rs
+++ b/src/client/src/client_manager.rs
@@ -49,7 +49,16 @@ impl NodeManager for NodeClients {
    async fn datanode(&self, datanode: &Peer) -> DatanodeRef {
        let client = self.get_client(datanode).await;

-        Arc::new(RegionRequester::new(client))
+        let ChannelConfig {
+            send_compression,
+            accept_compression,
+            ..
+        } = self.channel_manager.config();
+        Arc::new(RegionRequester::new(
+            client,
+            *send_compression,
+            *accept_compression,
+        ))
    }

    async fn flownode(&self, flownode: &Peer) -> FlownodeRef {
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -287,7 +287,7 @@ impl Database {
        let mut request = tonic::Request::new(request);
        Self::put_hints(request.metadata_mut(), hints)?;

-        let mut client = self.client.make_flight_client()?;
+        let mut client = self.client.make_flight_client(false, false)?;

        let response = client.mut_inner().do_get(request).await.or_else(|e| {
            let tonic_code = e.code();
@@ -409,7 +409,7 @@ impl Database {
            MetadataValue::from_str(db_to_put).context(InvalidTonicMetadataValueSnafu)?,
        );

-        let mut client = self.client.make_flight_client()?;
+        let mut client = self.client.make_flight_client(false, false)?;
        let response = client.mut_inner().do_put(request).await?;
        let response = response
            .into_inner()
--- a/src/client/src/flow.rs
+++ b/src/client/src/flow.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use api::v1::flow::{FlowRequest, FlowResponse};
+use api::v1::flow::{DirtyWindowRequest, DirtyWindowRequests, FlowRequest, FlowResponse};
 use api::v1::region::InsertRequests;
 use common_error::ext::BoxedError;
 use common_meta::node_manager::Flownode;
@@ -44,6 +44,16 @@ impl Flownode for FlowRequester {
            .map_err(BoxedError::new)
            .context(common_meta::error::ExternalSnafu)
    }
+
+    async fn handle_mark_window_dirty(
+        &self,
+        req: DirtyWindowRequest,
+    ) -> common_meta::error::Result<FlowResponse> {
+        self.handle_mark_window_dirty(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(common_meta::error::ExternalSnafu)
+    }
 }

 impl FlowRequester {
@@ -91,4 +101,20 @@ impl FlowRequester {
            .into_inner();
        Ok(response)
    }
+
+    async fn handle_mark_window_dirty(&self, req: DirtyWindowRequest) -> Result<FlowResponse> {
+        let (addr, mut client) = self.client.raw_flow_client()?;
+        let response = client
+            .handle_mark_dirty_time_window(DirtyWindowRequests {
+                requests: vec![req],
+            })
+            .await
+            .or_else(|e| {
+                let code = e.code();
+                let err: crate::error::Error = e.into();
+                Err(BoxedError::new(err)).context(FlowServerSnafu { addr, code })
+            })?
+            .into_inner();
+        Ok(response)
+    }
 }
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -46,6 +46,8 @@ use crate::{metrics, Client, Error};
 #[derive(Debug)]
 pub struct RegionRequester {
    client: Client,
+    send_compression: bool,
+    accept_compression: bool,
 }

 #[async_trait]
@@ -89,12 +91,18 @@ impl Datanode for RegionRequester {
 }

 impl RegionRequester {
-    pub fn new(client: Client) -> Self {
-        Self { client }
+    pub fn new(client: Client, send_compression: bool, accept_compression: bool) -> Self {
+        Self {
+            client,
+            send_compression,
+            accept_compression,
+        }
    }

    pub async fn do_get_inner(&self, ticket: Ticket) -> Result<SendableRecordBatchStream> {
-        let mut flight_client = self.client.make_flight_client()?;
+        let mut flight_client = self
+            .client
+            .make_flight_client(self.send_compression, self.accept_compression)?;
        let response = flight_client
            .mut_inner()
            .do_get(ticket)
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -10,7 +10,13 @@ name = "greptime"
 path = "src/bin/greptime.rs"

 [features]
-default = ["servers/pprof", "servers/mem-prof", "meta-srv/pg_kvbackend", "meta-srv/mysql_kvbackend"]
+default = [
+    "servers/pprof",
+    "servers/mem-prof",
+    "meta-srv/pg_kvbackend",
+    "meta-srv/mysql_kvbackend",
+]
+enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise"]
 tokio-console = ["common-telemetry/tokio-console"]

 [lints]
@@ -61,6 +67,7 @@ metric-engine.workspace = true
 mito2.workspace = true
 moka.workspace = true
 nu-ansi-term = "0.46"
+object-store.workspace = true
 plugins.workspace = true
 prometheus.workspace = true
 prost.workspace = true
@@ -74,6 +81,7 @@ servers.workspace = true
 session.workspace = true
 similar-asserts.workspace = true
 snafu.workspace = true
+stat.workspace = true
 store-api.workspace = true
 substrait.workspace = true
 table.workspace = true
--- a/src/cmd/src/cli.rs
+++ b/src/cmd/src/cli.rs
@@ -146,6 +146,7 @@ mod tests {
        let output_dir = tempfile::tempdir().unwrap();
        let cli = cli::Command::parse_from([
            "cli",
+            "data",
            "export",
            "--addr",
            "127.0.0.1:4000",
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -14,12 +14,13 @@

 pub mod builder;

+use std::path::Path;
 use std::time::Duration;

 use async_trait::async_trait;
 use clap::Parser;
 use common_config::Configurable;
-use common_telemetry::logging::TracingOptions;
+use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
 use common_telemetry::{info, warn};
 use common_wal::config::DatanodeWalConfig;
 use datanode::datanode::Datanode;
@@ -248,6 +249,14 @@ impl StartCommand {
            raft_engine_config.dir.replace(wal_dir.clone());
        }

+        // If the logging dir is not set, use the default logs dir in the data home.
+        if opts.logging.dir.is_empty() {
+            opts.logging.dir = Path::new(&opts.storage.data_home)
+                .join(DEFAULT_LOGGING_DIR)
+                .to_string_lossy()
+                .to_string();
+        }
+
        if let Some(http_addr) = &self.http_addr {
            opts.http.addr.clone_from(http_addr);
        }
@@ -271,7 +280,7 @@ mod tests {

    use common_config::ENV_VAR_SEP;
    use common_test_util::temp_dir::create_named_temp_file;
-    use datanode::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
+    use object_store::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
    use servers::heartbeat_options::HeartbeatOptions;

    use super::*;
--- a/src/cmd/src/datanode/builder.rs
+++ b/src/cmd/src/datanode/builder.rs
@@ -28,7 +28,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::datanode::{DatanodeOptions, Instance, APP_NAME};
 use crate::error::{MetaClientInitSnafu, MissingConfigSnafu, Result, StartDatanodeSnafu};
-use crate::log_versions;
+use crate::{create_resource_limit_metrics, log_versions};

 /// Builder for Datanode instance.
 pub struct InstanceBuilder {
@@ -68,6 +68,7 @@ impl InstanceBuilder {
        );

        log_versions(version(), short_version(), APP_NAME);
+        create_resource_limit_metrics(APP_NAME);

        plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
            .await
@@ -92,6 +93,7 @@ impl InstanceBuilder {
            MetaClientType::Datanode { member_id },
            meta_client_options,
            Some(&plugins),
+            None,
        )
        .await
        .context(MetaClientInitSnafu)?;
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::path::Path;
 use std::sync::Arc;
 use std::time::Duration;

@@ -21,7 +22,7 @@ use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKv
 use clap::Parser;
 use client::client_manager::NodeClients;
 use common_base::Plugins;
-use common_config::Configurable;
+use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_grpc::channel_manager::ChannelConfig;
 use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
@@ -30,7 +31,7 @@ use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
-use common_telemetry::logging::TracingOptions;
+use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
 use common_version::{short_version, version};
 use flow::{
    get_flow_auth_options, FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder,
@@ -45,7 +46,7 @@ use crate::error::{
    MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
 };
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{log_versions, App};
+use crate::{create_resource_limit_metrics, log_versions, App};

 pub const APP_NAME: &str = "greptime-flownode";

@@ -54,14 +55,32 @@ type FlownodeOptions = GreptimeOptions<flow::FlownodeOptions>;
 pub struct Instance {
    flownode: FlownodeInstance,

+    // The components of flownode, which make it easier to expand based
+    // on the components.
+    #[cfg(feature = "enterprise")]
+    components: Components,
+
    // Keep the logging guard to prevent the worker from being dropped.
    _guard: Vec<WorkerGuard>,
 }

+#[cfg(feature = "enterprise")]
+pub struct Components {
+    pub catalog_manager: catalog::CatalogManagerRef,
+    pub fe_client: Arc<FrontendClient>,
+    pub kv_backend: common_meta::kv_backend::KvBackendRef,
+}
+
 impl Instance {
-    pub fn new(flownode: FlownodeInstance, guard: Vec<WorkerGuard>) -> Self {
+    pub fn new(
+        flownode: FlownodeInstance,
+        #[cfg(feature = "enterprise")] components: Components,
+        guard: Vec<WorkerGuard>,
+    ) -> Self {
        Self {
            flownode,
+            #[cfg(feature = "enterprise")]
+            components,
            _guard: guard,
        }
    }
@@ -74,6 +93,11 @@ impl Instance {
    pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
        &mut self.flownode
    }
+
+    #[cfg(feature = "enterprise")]
+    pub fn components(&self) -> &Components {
+        &self.components
+    }
 }

 #[async_trait::async_trait]
@@ -186,6 +210,14 @@ impl StartCommand {
            opts.logging.dir.clone_from(dir);
        }

+        // If the logging dir is not set, use the default logs dir in the data home.
+        if opts.logging.dir.is_empty() {
+            opts.logging.dir = Path::new(DEFAULT_DATA_HOME)
+                .join(DEFAULT_LOGGING_DIR)
+                .to_string_lossy()
+                .to_string();
+        }
+
        if global_options.log_level.is_some() {
            opts.logging.level.clone_from(&global_options.log_level);
        }
@@ -246,7 +278,9 @@ impl StartCommand {
            opts.component.node_id.map(|x| x.to_string()),
            None,
        );
+
        log_versions(version(), short_version(), APP_NAME);
+        create_resource_limit_metrics(APP_NAME);

        info!("Flownode start command: {:#?}", self);
        info!("Flownode options: {:#?}", opts);
@@ -272,6 +306,7 @@ impl StartCommand {
            MetaClientType::Flownode { member_id },
            meta_config,
            None,
+            None,
        )
        .await
        .context(MetaClientInitSnafu)?;
@@ -312,6 +347,7 @@ impl StartCommand {
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
            None,
+            None,
        );

        let table_metadata_manager =
@@ -337,19 +373,20 @@ impl StartCommand {
        let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
        let frontend_client =
            FrontendClient::from_meta_client(meta_client.clone(), flow_auth_header);
+        let frontend_client = Arc::new(frontend_client);
        let flownode_builder = FlownodeBuilder::new(
            opts.clone(),
            plugins,
            table_metadata_manager,
            catalog_manager.clone(),
            flow_metadata_manager,
-            Arc::new(frontend_client),
+            frontend_client.clone(),
        )
        .with_heartbeat_task(heartbeat_task);

        let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
        let services = FlownodeServiceBuilder::new(&opts)
-            .with_grpc_server(flownode.flownode_server().clone())
+            .with_default_grpc_server(flownode.flownode_server())
            .enable_http_service()
            .build()
            .context(StartFlownodeSnafu)?;
@@ -381,6 +418,16 @@ impl StartCommand {
            .set_frontend_invoker(invoker)
            .await;

-        Ok(Instance::new(flownode, guard))
+        #[cfg(feature = "enterprise")]
+        let components = Components {
+            catalog_manager: catalog_manager.clone(),
+            fe_client: frontend_client,
+            kv_backend: cached_meta_backend,
+        };
+
+        #[cfg(not(feature = "enterprise"))]
+        return Ok(Instance::new(flownode, guard));
+        #[cfg(feature = "enterprise")]
+        Ok(Instance::new(flownode, components, guard))
    }
 }
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::path::Path;
 use std::sync::Arc;
 use std::time::Duration;

@@ -19,17 +20,18 @@ use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_extension::DistributedInformationExtension;
 use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
+use catalog::process_manager::ProcessManager;
 use clap::Parser;
 use client::client_manager::NodeClients;
 use common_base::Plugins;
-use common_config::Configurable;
+use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_grpc::channel_manager::ChannelConfig;
 use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_telemetry::info;
-use common_telemetry::logging::TracingOptions;
+use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
 use common_time::timezone::set_default_timezone;
 use common_version::{short_version, version};
 use frontend::frontend::Frontend;
@@ -37,6 +39,7 @@ use frontend::heartbeat::HeartbeatTask;
 use frontend::instance::builder::FrontendBuilder;
 use frontend::server::Services;
 use meta_client::{MetaClientOptions, MetaClientType};
+use servers::addrs;
 use servers::export_metrics::ExportMetricsTask;
 use servers::tls::{TlsMode, TlsOption};
 use snafu::{OptionExt, ResultExt};
@@ -44,7 +47,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::error::{self, Result};
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{log_versions, App};
+use crate::{create_resource_limit_metrics, log_versions, App};

 type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;

@@ -194,6 +197,14 @@ impl StartCommand {
            opts.logging.dir.clone_from(dir);
        }

+        // If the logging dir is not set, use the default logs dir in the data home.
+        if opts.logging.dir.is_empty() {
+            opts.logging.dir = Path::new(DEFAULT_DATA_HOME)
+                .join(DEFAULT_LOGGING_DIR)
+                .to_string_lossy()
+                .to_string();
+        }
+
        if global_options.log_level.is_some() {
            opts.logging.level.clone_from(&global_options.log_level);
        }
@@ -270,7 +281,9 @@ impl StartCommand {
            opts.component.node_id.clone(),
            opts.component.slow_query.as_ref(),
        );
+
        log_versions(version(), short_version(), APP_NAME);
+        create_resource_limit_metrics(APP_NAME);

        info!("Frontend start command: {:#?}", self);
        info!("Frontend options: {:#?}", opts);
@@ -300,6 +313,7 @@ impl StartCommand {
            MetaClientType::Frontend,
            meta_client_options,
            Some(&plugins),
+            None,
        )
        .await
        .context(error::MetaClientInitSnafu)?;
@@ -331,11 +345,17 @@ impl StartCommand {

        let information_extension =
            Arc::new(DistributedInformationExtension::new(meta_client.clone()));
+
+        let process_manager = Arc::new(ProcessManager::new(
+            addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
+            Some(meta_client.clone()),
+        ));
        let catalog_manager = KvBackendCatalogManager::new(
            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
            None,
+            Some(process_manager.clone()),
        );

        let executor = HandlerGroupExecutor::new(vec![
@@ -353,12 +373,16 @@ impl StartCommand {

        // frontend to datanode need not timeout.
        // Some queries are expected to take long time.
-        let channel_config = ChannelConfig {
+        let mut channel_config = ChannelConfig {
            timeout: None,
            tcp_nodelay: opts.datanode.client.tcp_nodelay,
            connect_timeout: Some(opts.datanode.client.connect_timeout),
            ..Default::default()
        };
+        if opts.grpc.flight_compression.transport_compression() {
+            channel_config.accept_compression = true;
+            channel_config.send_compression = true;
+        }
        let client = NodeClients::new(channel_config);

        let instance = FrontendBuilder::new(
@@ -368,6 +392,7 @@ impl StartCommand {
            catalog_manager,
            Arc::new(client),
            meta_client,
+            process_manager,
        )
        .with_plugin(plugins.clone())
        .with_local_cache_invalidator(layered_cache_registry)
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -16,6 +16,7 @@

 use async_trait::async_trait;
 use common_telemetry::{error, info};
+use stat::{get_cpu_limit, get_memory_limit};

 use crate::error::Result;

@@ -31,6 +32,12 @@ pub mod standalone;
 lazy_static::lazy_static! {
    static ref APP_VERSION: prometheus::IntGaugeVec =
        prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["version", "short_version", "app"]).unwrap();
+
+    static ref CPU_LIMIT: prometheus::IntGaugeVec =
+        prometheus::register_int_gauge_vec!("greptime_cpu_limit_in_millicores", "cpu limit in millicores", &["app"]).unwrap();
+
+    static ref MEMORY_LIMIT: prometheus::IntGaugeVec =
+        prometheus::register_int_gauge_vec!("greptime_memory_limit_in_bytes", "memory limit in bytes", &["app"]).unwrap();
 }

 /// wait for the close signal, for unix platform it's SIGINT or SIGTERM
@@ -114,6 +121,24 @@ pub fn log_versions(version: &str, short_version: &str, app: &str) {
    log_env_flags();
 }

+pub fn create_resource_limit_metrics(app: &str) {
+    if let Some(cpu_limit) = get_cpu_limit() {
+        info!(
+            "GreptimeDB start with cpu limit in millicores: {}",
+            cpu_limit
+        );
+        CPU_LIMIT.with_label_values(&[app]).set(cpu_limit);
+    }
+
+    if let Some(memory_limit) = get_memory_limit() {
+        info!(
+            "GreptimeDB start with memory limit in bytes: {}",
+            memory_limit
+        );
+        MEMORY_LIMIT.with_label_values(&[app]).set(memory_limit);
+    }
+}
+
 fn log_env_flags() {
    info!("command line arguments");
    for argument in std::env::args() {
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 use std::fmt;
+use std::path::Path;
 use std::time::Duration;

 use async_trait::async_trait;
@@ -20,7 +21,7 @@ use clap::Parser;
 use common_base::Plugins;
 use common_config::Configurable;
 use common_telemetry::info;
-use common_telemetry::logging::TracingOptions;
+use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
 use common_version::{short_version, version};
 use meta_srv::bootstrap::MetasrvInstance;
 use meta_srv::metasrv::BackendImpl;
@@ -29,7 +30,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::error::{self, LoadLayeredConfigSnafu, Result, StartMetaServerSnafu};
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{log_versions, App};
+use crate::{create_resource_limit_metrics, log_versions, App};

 type MetasrvOptions = GreptimeOptions<meta_srv::metasrv::MetasrvOptions>;

@@ -236,12 +237,20 @@ impl StartCommand {
            tokio_console_addr: global_options.tokio_console_addr.clone(),
        };

+        #[allow(deprecated)]
        if let Some(addr) = &self.rpc_bind_addr {
            opts.bind_addr.clone_from(addr);
+            opts.grpc.bind_addr.clone_from(addr);
+        } else if !opts.bind_addr.is_empty() {
+            opts.grpc.bind_addr.clone_from(&opts.bind_addr);
        }

+        #[allow(deprecated)]
        if let Some(addr) = &self.rpc_server_addr {
            opts.server_addr.clone_from(addr);
+            opts.grpc.server_addr.clone_from(addr);
+        } else if !opts.server_addr.is_empty() {
+            opts.grpc.server_addr.clone_from(&opts.server_addr);
        }

        if let Some(addrs) = &self.store_addrs {
@@ -274,6 +283,14 @@ impl StartCommand {
            opts.data_home.clone_from(data_home);
        }

+        // If the logging dir is not set, use the default logs dir in the data home.
+        if opts.logging.dir.is_empty() {
+            opts.logging.dir = Path::new(&opts.data_home)
+                .join(DEFAULT_LOGGING_DIR)
+                .to_string_lossy()
+                .to_string();
+        }
+
        if !self.store_key_prefix.is_empty() {
            opts.store_key_prefix.clone_from(&self.store_key_prefix)
        }
@@ -302,13 +319,15 @@ impl StartCommand {
            None,
            None,
        );
+
        log_versions(version(), short_version(), APP_NAME);
+        create_resource_limit_metrics(APP_NAME);

        info!("Metasrv start command: {:#?}", self);

        let plugin_opts = opts.plugins;
        let mut opts = opts.component;
-        opts.detect_server_addr();
+        opts.grpc.detect_server_addr();

        info!("Metasrv options: {:#?}", opts);

@@ -352,7 +371,7 @@ mod tests {
        };

        let options = cmd.load_options(&Default::default()).unwrap().component;
-        assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
+        assert_eq!("127.0.0.1:3002".to_string(), options.grpc.bind_addr);
        assert_eq!(vec!["127.0.0.1:2380".to_string()], options.store_addrs);
        assert_eq!(SelectorType::LoadBased, options.selector);
    }
@@ -385,8 +404,8 @@ mod tests {
        };

        let options = cmd.load_options(&Default::default()).unwrap().component;
-        assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
-        assert_eq!("127.0.0.1:3002".to_string(), options.server_addr);
+        assert_eq!("127.0.0.1:3002".to_string(), options.grpc.bind_addr);
+        assert_eq!("127.0.0.1:3002".to_string(), options.grpc.server_addr);
        assert_eq!(vec!["127.0.0.1:2379".to_string()], options.store_addrs);
        assert_eq!(SelectorType::LeaseBased, options.selector);
        assert_eq!("debug", options.logging.level.as_ref().unwrap());
@@ -498,10 +517,10 @@ mod tests {
                let opts = command.load_options(&Default::default()).unwrap().component;

                // Should be read from env, env > default values.
-                assert_eq!(opts.bind_addr, "127.0.0.1:14002");
+                assert_eq!(opts.grpc.bind_addr, "127.0.0.1:14002");

                // Should be read from config file, config file > env > default values.
-                assert_eq!(opts.server_addr, "127.0.0.1:3002");
+                assert_eq!(opts.grpc.server_addr, "127.0.0.1:3002");

                // Should be read from cli, cli > config file > env > default values.
                assert_eq!(opts.http.addr, "127.0.0.1:14000");
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 use std::net::SocketAddr;
+use std::path::Path;
 use std::sync::Arc;
 use std::{fs, path};

@@ -20,6 +21,7 @@ use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
 use catalog::information_schema::InformationExtension;
 use catalog::kvbackend::KvBackendCatalogManager;
+use catalog::process_manager::ProcessManager;
 use clap::Parser;
 use client::api::v1::meta::RegionRole;
 use common_base::readable_size::ReadableSize;
@@ -28,18 +30,16 @@ use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
 use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
 use common_error::ext::BoxedError;
 use common_meta::cache::LayeredCacheRegistryBuilder;
-use common_meta::cache_invalidator::CacheInvalidatorRef;
 use common_meta::cluster::{NodeInfo, NodeStatus};
 use common_meta::datanode::RegionStat;
-use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
-use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
+use common_meta::ddl::flow_meta::FlowMetadataAllocator;
+use common_meta::ddl::table_meta::TableMetadataAllocator;
 use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
 use common_meta::ddl_manager::DdlManager;
 use common_meta::key::flow::flow_state::FlowStat;
-use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
+use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
-use common_meta::node_manager::NodeManagerRef;
 use common_meta::peer::Peer;
 use common_meta::region_keeper::MemoryRegionKeeper;
 use common_meta::region_registry::LeaderRegionRegistry;
@@ -47,7 +47,9 @@ use common_meta::sequence::SequenceBuilder;
 use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef};
 use common_procedure::{ProcedureInfo, ProcedureManagerRef};
 use common_telemetry::info;
-use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions};
+use common_telemetry::logging::{
+    LoggingOptions, SlowQueryOptions, TracingOptions, DEFAULT_LOGGING_DIR,
+};
 use common_time::timezone::set_default_timezone;
 use common_version::{short_version, version};
 use common_wal::config::DatanodeWalConfig;
@@ -69,6 +71,7 @@ use frontend::service_config::{
 };
 use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
 use mito2::config::MitoConfig;
+use query::options::QueryOptions;
 use serde::{Deserialize, Serialize};
 use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
 use servers::grpc::GrpcOptions;
@@ -80,7 +83,7 @@ use tracing_appender::non_blocking::WorkerGuard;

 use crate::error::{Result, StartFlownodeSnafu};
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{error, log_versions, App};
+use crate::{create_resource_limit_metrics, error, log_versions, App};

 pub const APP_NAME: &str = "greptime-standalone";

@@ -153,6 +156,7 @@ pub struct StandaloneOptions {
    pub init_regions_parallelism: usize,
    pub max_in_flight_write_bytes: Option<ReadableSize>,
    pub slow_query: Option<SlowQueryOptions>,
+    pub query: QueryOptions,
 }

 impl Default for StandaloneOptions {
@@ -185,6 +189,7 @@ impl Default for StandaloneOptions {
            init_regions_parallelism: 16,
            max_in_flight_write_bytes: None,
            slow_query: Some(SlowQueryOptions::default()),
+            query: QueryOptions::default(),
        }
    }
 }
@@ -240,6 +245,7 @@ impl StandaloneOptions {
            grpc: cloned_opts.grpc,
            init_regions_in_background: cloned_opts.init_regions_in_background,
            init_regions_parallelism: cloned_opts.init_regions_parallelism,
+            query: cloned_opts.query,
            ..Default::default()
        }
    }
@@ -251,15 +257,34 @@ pub struct Instance {
    flownode: FlownodeInstance,
    procedure_manager: ProcedureManagerRef,
    wal_options_allocator: WalOptionsAllocatorRef,
+
+    // The components of standalone, which make it easier to expand based
+    // on the components.
+    #[cfg(feature = "enterprise")]
+    components: Components,
+
    // Keep the logging guard to prevent the worker from being dropped.
    _guard: Vec<WorkerGuard>,
 }

+#[cfg(feature = "enterprise")]
+pub struct Components {
+    pub plugins: Plugins,
+    pub kv_backend: KvBackendRef,
+    pub frontend_client: Arc<FrontendClient>,
+    pub catalog_manager: catalog::CatalogManagerRef,
+}
+
 impl Instance {
    /// Find the socket addr of a server by its `name`.
    pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
        self.frontend.server_handlers().addr(name)
    }
+
+    #[cfg(feature = "enterprise")]
+    pub fn components(&self) -> &Components {
+        &self.components
+    }
 }

 #[async_trait]
@@ -401,6 +426,14 @@ impl StartCommand {
            opts.storage.data_home.clone_from(data_home);
        }

+        // If the logging dir is not set, use the default logs dir in the data home.
+        if opts.logging.dir.is_empty() {
+            opts.logging.dir = Path::new(&opts.storage.data_home)
+                .join(DEFAULT_LOGGING_DIR)
+                .to_string_lossy()
+                .to_string();
+        }
+
        if let Some(addr) = &self.rpc_bind_addr {
            // frontend grpc addr conflict with datanode default grpc addr
            let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
@@ -451,7 +484,9 @@ impl StartCommand {
            None,
            opts.component.slow_query.as_ref(),
        );
+
        log_versions(version(), short_version(), APP_NAME);
+        create_resource_limit_metrics(APP_NAME);

        info!("Standalone start command: {:#?}", self);
        info!("Standalone options: {opts:#?}");
@@ -507,11 +542,14 @@ impl StartCommand {
            datanode.region_server(),
            procedure_manager.clone(),
        ));
+
+        let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
        let catalog_manager = KvBackendCatalogManager::new(
            information_extension.clone(),
            kv_backend.clone(),
            layered_cache_registry.clone(),
            Some(procedure_manager.clone()),
+            Some(process_manager.clone()),
        );

        let table_metadata_manager =
@@ -527,13 +565,14 @@ impl StartCommand {
        // actually make a connection
        let (frontend_client, frontend_instance_handler) =
            FrontendClient::from_empty_grpc_handler();
+        let frontend_client = Arc::new(frontend_client);
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
-            Arc::new(frontend_client.clone()),
+            frontend_client.clone(),
        );
        let flownode = flow_builder
            .build()
@@ -571,24 +610,36 @@ impl StartCommand {
            .await
            .context(error::BuildWalOptionsAllocatorSnafu)?;
        let wal_options_allocator = Arc::new(wal_options_allocator);
-        let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
+        let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
            table_id_sequence,
            wal_options_allocator.clone(),
        ));
-        let flow_meta_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
+        let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
            flow_id_sequence,
        ));

-        let ddl_task_executor = Self::create_ddl_task_executor(
-            procedure_manager.clone(),
-            node_manager.clone(),
-            layered_cache_registry.clone(),
-            table_metadata_manager,
-            table_meta_allocator,
-            flow_metadata_manager,
-            flow_meta_allocator,
-        )
-        .await?;
+        let ddl_context = DdlContext {
+            node_manager: node_manager.clone(),
+            cache_invalidator: layered_cache_registry.clone(),
+            memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
+            leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
+            table_metadata_manager: table_metadata_manager.clone(),
+            table_metadata_allocator: table_metadata_allocator.clone(),
+            flow_metadata_manager: flow_metadata_manager.clone(),
+            flow_metadata_allocator: flow_metadata_allocator.clone(),
+            region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
+        };
+        let procedure_manager_c = procedure_manager.clone();
+
+        let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
+            .context(error::InitDdlManagerSnafu)?;
+        #[cfg(feature = "enterprise")]
+        let ddl_manager = {
+            let trigger_ddl_manager: Option<common_meta::ddl_manager::TriggerDdlManagerRef> =
+                plugins.get();
+            ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
+        };
+        let ddl_task_executor: ProcedureExecutorRef = Arc::new(ddl_manager);

        let fe_instance = FrontendBuilder::new(
            fe_opts.clone(),
@@ -597,6 +648,7 @@ impl StartCommand {
            catalog_manager.clone(),
            node_manager.clone(),
            ddl_task_executor.clone(),
+            process_manager,
        )
        .with_plugin(plugins.clone())
        .try_build()
@@ -624,13 +676,13 @@ impl StartCommand {
            node_manager,
        )
        .await
-        .context(error::StartFlownodeSnafu)?;
+        .context(StartFlownodeSnafu)?;
        flow_streaming_engine.set_frontend_invoker(invoker).await;

        let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
            .context(error::ServersSnafu)?;

-        let servers = Services::new(opts, fe_instance.clone(), plugins)
+        let servers = Services::new(opts, fe_instance.clone(), plugins.clone())
            .build()
            .context(error::StartFrontendSnafu)?;

@@ -641,47 +693,26 @@ impl StartCommand {
            export_metrics_task,
        };

+        #[cfg(feature = "enterprise")]
+        let components = Components {
+            plugins,
+            kv_backend,
+            frontend_client,
+            catalog_manager,
+        };
+
        Ok(Instance {
            datanode,
            frontend,
            flownode,
            procedure_manager,
            wal_options_allocator,
+            #[cfg(feature = "enterprise")]
+            components,
            _guard: guard,
        })
    }

-    pub async fn create_ddl_task_executor(
-        procedure_manager: ProcedureManagerRef,
-        node_manager: NodeManagerRef,
-        cache_invalidator: CacheInvalidatorRef,
-        table_metadata_manager: TableMetadataManagerRef,
-        table_metadata_allocator: TableMetadataAllocatorRef,
-        flow_metadata_manager: FlowMetadataManagerRef,
-        flow_metadata_allocator: FlowMetadataAllocatorRef,
-    ) -> Result<ProcedureExecutorRef> {
-        let procedure_executor: ProcedureExecutorRef = Arc::new(
-            DdlManager::try_new(
-                DdlContext {
-                    node_manager,
-                    cache_invalidator,
-                    memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
-                    leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
-                    table_metadata_manager,
-                    table_metadata_allocator,
-                    flow_metadata_manager,
-                    flow_metadata_allocator,
-                    region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
-                },
-                procedure_manager,
-                true,
-            )
-            .context(error::InitDdlManagerSnafu)?,
-        );
-
-        Ok(procedure_executor)
-    }
-
    pub async fn create_table_metadata_manager(
        kv_backend: KvBackendRef,
    ) -> Result<TableMetadataManagerRef> {
@@ -817,7 +848,7 @@ mod tests {
    use common_config::ENV_VAR_SEP;
    use common_test_util::temp_dir::create_named_temp_file;
    use common_wal::config::DatanodeWalConfig;
-    use datanode::config::{FileConfig, GcsConfig};
+    use object_store::config::{FileConfig, GcsConfig};

    use super::*;
    use crate::options::GlobalOptions;
@@ -936,15 +967,15 @@ mod tests {

        assert!(matches!(
            &dn_opts.storage.store,
-            datanode::config::ObjectStoreConfig::File(FileConfig { .. })
+            object_store::config::ObjectStoreConfig::File(FileConfig { .. })
        ));
        assert_eq!(dn_opts.storage.providers.len(), 2);
        assert!(matches!(
            dn_opts.storage.providers[0],
-            datanode::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
+            object_store::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
        ));
        match &dn_opts.storage.providers[1] {
-            datanode::config::ObjectStoreConfig::S3(s3_config) => {
+            object_store::config::ObjectStoreConfig::S3(s3_config) => {
                assert_eq!(
                    "SecretBox<alloc::string::String>([REDACTED])".to_string(),
                    format!("{:?}", s3_config.access_key_id)
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -16,9 +16,9 @@ use std::time::Duration;

 use cmd::options::GreptimeOptions;
 use cmd::standalone::StandaloneOptions;
-use common_config::Configurable;
+use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_options::datanode::{ClientOptions, DatanodeClientOptions};
-use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
+use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_HTTP_ENDPOINT};
 use common_wal::config::raft_engine::RaftEngineConfig;
 use common_wal::config::DatanodeWalConfig;
 use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
@@ -32,6 +32,7 @@ use mito2::config::MitoConfig;
 use servers::export_metrics::ExportMetricsOption;
 use servers::grpc::GrpcOptions;
 use servers::http::HttpOptions;
+use store_api::path_utils::WAL_DIR;

 #[allow(deprecated)]
 #[test]
@@ -56,13 +57,13 @@ fn test_load_datanode_example_config() {
                metadata_cache_tti: Duration::from_secs(300),
            }),
            wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
-                dir: Some("./greptimedb_data/wal".to_string()),
+                dir: Some(format!("{}/{}", DEFAULT_DATA_HOME, WAL_DIR)),
                sync_period: Some(Duration::from_secs(10)),
                recovery_parallelism: 2,
                ..Default::default()
            }),
            storage: StorageConfig {
-                data_home: "./greptimedb_data/".to_string(),
+                data_home: DEFAULT_DATA_HOME.to_string(),
                ..Default::default()
            },
            region_engine: vec![
@@ -79,12 +80,13 @@ fn test_load_datanode_example_config() {
            ],
            logging: LoggingOptions {
                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
            export_metrics: ExportMetricsOption {
-                self_import: Some(Default::default()),
+                self_import: None,
                remote_write: Some(Default::default()),
                ..Default::default()
            },
@@ -121,7 +123,8 @@ fn test_load_frontend_example_config() {
            }),
            logging: LoggingOptions {
                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -133,7 +136,7 @@ fn test_load_frontend_example_config() {
                },
            },
            export_metrics: ExportMetricsOption {
-                self_import: Some(Default::default()),
+                self_import: None,
                remote_write: Some(Default::default()),
                ..Default::default()
            },
@@ -160,12 +163,16 @@ fn test_load_metasrv_example_config() {
    let expected = GreptimeOptions::<MetasrvOptions> {
        component: MetasrvOptions {
            selector: SelectorType::default(),
-            data_home: "./greptimedb_data/metasrv/".to_string(),
-            server_addr: "127.0.0.1:3002".to_string(),
+            data_home: DEFAULT_DATA_HOME.to_string(),
+            grpc: GrpcOptions {
+                bind_addr: "127.0.0.1:3002".to_string(),
+                server_addr: "127.0.0.1:3002".to_string(),
+                ..Default::default()
+            },
            logging: LoggingOptions {
-                dir: "./greptimedb_data/logs".to_string(),
+                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -177,7 +184,7 @@ fn test_load_metasrv_example_config() {
                },
            },
            export_metrics: ExportMetricsOption {
-                self_import: Some(Default::default()),
+                self_import: None,
                remote_write: Some(Default::default()),
                ..Default::default()
            },
@@ -198,7 +205,7 @@ fn test_load_standalone_example_config() {
        component: StandaloneOptions {
            default_timezone: Some("UTC".to_string()),
            wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
-                dir: Some("./greptimedb_data/wal".to_string()),
+                dir: Some(format!("{}/{}", DEFAULT_DATA_HOME, WAL_DIR)),
                sync_period: Some(Duration::from_secs(10)),
                recovery_parallelism: 2,
                ..Default::default()
@@ -216,12 +223,13 @@ fn test_load_standalone_example_config() {
                }),
            ],
            storage: StorageConfig {
-                data_home: "./greptimedb_data/".to_string(),
+                data_home: DEFAULT_DATA_HOME.to_string(),
                ..Default::default()
            },
            logging: LoggingOptions {
                level: Some("info".to_string()),
-                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
+                dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
+                otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
--- a/src/common/base/src/cancellation.rs
+++ b/src/common/base/src/cancellation.rs
@@ -0,0 +1,240 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! [CancellationHandle] is used to compose with manual implementation of [futures::future::Future]
+//! or [futures::stream::Stream] to facilitate cancellation.
+//! See example in [frontend::stream_wrapper::CancellableStreamWrapper] and [CancellableFuture].
+
+use std::fmt::{Debug, Display, Formatter};
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use futures::task::AtomicWaker;
+use pin_project::pin_project;
+
+#[derive(Default)]
+pub struct CancellationHandle {
+    waker: AtomicWaker,
+    cancelled: AtomicBool,
+}
+
+impl Debug for CancellationHandle {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("CancellationHandle")
+            .field("cancelled", &self.is_cancelled())
+            .finish()
+    }
+}
+
+impl CancellationHandle {
+    pub fn waker(&self) -> &AtomicWaker {
+        &self.waker
+    }
+
+    /// Cancels a future or stream.
+    pub fn cancel(&self) {
+        if self
+            .cancelled
+            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+            .is_ok()
+        {
+            self.waker.wake();
+        }
+    }
+
+    /// Is this handle cancelled.
+    pub fn is_cancelled(&self) -> bool {
+        self.cancelled.load(Ordering::Relaxed)
+    }
+}
+
+#[pin_project]
+#[derive(Debug, Clone)]
+pub struct CancellableFuture<T> {
+    #[pin]
+    fut: T,
+    handle: Arc<CancellationHandle>,
+}
+
+impl<T> CancellableFuture<T> {
+    pub fn new(fut: T, handle: Arc<CancellationHandle>) -> Self {
+        Self { fut, handle }
+    }
+}
+
+impl<T> Future for CancellableFuture<T>
+where
+    T: Future,
+{
+    type Output = Result<T::Output, Cancelled>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        let this = self.as_mut().project();
+        // Check if the task has been aborted
+        if this.handle.is_cancelled() {
+            return Poll::Ready(Err(Cancelled));
+        }
+
+        if let Poll::Ready(x) = this.fut.poll(cx) {
+            return Poll::Ready(Ok(x));
+        }
+
+        this.handle.waker().register(cx.waker());
+        if this.handle.is_cancelled() {
+            return Poll::Ready(Err(Cancelled));
+        }
+        Poll::Pending
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct Cancelled;
+
+impl Display for Cancelled {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Future has been cancelled")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+    use std::time::Duration;
+
+    use tokio::time::{sleep, timeout};
+
+    use crate::cancellation::{CancellableFuture, CancellationHandle, Cancelled};
+
+    #[tokio::test]
+    async fn test_cancellable_future_completes_normally() {
+        let handle = Arc::new(CancellationHandle::default());
+        let future = async { 42 };
+        let cancellable = CancellableFuture::new(future, handle);
+
+        let result = cancellable.await;
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn test_cancellable_future_cancelled_before_start() {
+        let handle = Arc::new(CancellationHandle::default());
+        handle.cancel();
+
+        let future = async { 42 };
+        let cancellable = CancellableFuture::new(future, handle);
+
+        let result = cancellable.await;
+        assert!(result.is_err());
+        assert!(matches!(result.unwrap_err(), Cancelled));
+    }
+
+    #[tokio::test]
+    async fn test_cancellable_future_cancelled_during_execution() {
+        let handle = Arc::new(CancellationHandle::default());
+        let handle_clone = handle.clone();
+
+        // Create a future that sleeps for a long time
+        let future = async {
+            sleep(Duration::from_secs(10)).await;
+            42
+        };
+        let cancellable = CancellableFuture::new(future, handle);
+
+        // Cancel the future after a short delay
+        tokio::spawn(async move {
+            sleep(Duration::from_millis(50)).await;
+            handle_clone.cancel();
+        });
+
+        let result = cancellable.await;
+        assert!(result.is_err());
+        assert!(matches!(result.unwrap_err(), Cancelled));
+    }
+
+    #[tokio::test]
+    async fn test_cancellable_future_completes_before_cancellation() {
+        let handle = Arc::new(CancellationHandle::default());
+        let handle_clone = handle.clone();
+
+        // Create a future that completes quickly
+        let future = async {
+            sleep(Duration::from_millis(10)).await;
+            42
+        };
+        let cancellable = CancellableFuture::new(future, handle);
+
+        // Try to cancel after the future should have completed
+        tokio::spawn(async move {
+            sleep(Duration::from_millis(100)).await;
+            handle_clone.cancel();
+        });
+
+        let result = cancellable.await;
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn test_cancellation_handle_is_cancelled() {
+        let handle = CancellationHandle::default();
+        assert!(!handle.is_cancelled());
+
+        handle.cancel();
+        assert!(handle.is_cancelled());
+    }
+
+    #[tokio::test]
+    async fn test_multiple_cancellable_futures_with_same_handle() {
+        let handle = Arc::new(CancellationHandle::default());
+
+        let future1 = CancellableFuture::new(async { 1 }, handle.clone());
+        let future2 = CancellableFuture::new(async { 2 }, handle.clone());
+
+        // Cancel before starting
+        handle.cancel();
+
+        let (result1, result2) = tokio::join!(future1, future2);
+
+        assert!(result1.is_err());
+        assert!(result2.is_err());
+        assert!(matches!(result1.unwrap_err(), Cancelled));
+        assert!(matches!(result2.unwrap_err(), Cancelled));
+    }
+
+    #[tokio::test]
+    async fn test_cancellable_future_with_timeout() {
+        let handle = Arc::new(CancellationHandle::default());
+        let future = async {
+            sleep(Duration::from_secs(1)).await;
+            42
+        };
+        let cancellable = CancellableFuture::new(future, handle.clone());
+
+        // Use timeout to ensure the test doesn't hang
+        let result = timeout(Duration::from_millis(100), cancellable).await;
+
+        // Should timeout because the future takes 1 second but we timeout after 100ms
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_cancelled_display() {
+        let cancelled = Cancelled;
+        assert_eq!(format!("{}", cancelled), "Future has been cancelled");
+    }
+}
--- a/src/common/base/src/lib.rs
+++ b/src/common/base/src/lib.rs
@@ -14,6 +14,7 @@

 pub mod bit_vec;
 pub mod bytes;
+pub mod cancellation;
 pub mod plugins;
 pub mod range_read;
 #[allow(clippy::all)]
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -102,6 +102,8 @@ pub const INFORMATION_SCHEMA_FLOW_TABLE_ID: u32 = 33;
 pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
 /// id for information_schema.region_statistics
 pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
+/// id for information_schema.process_list
+pub const INFORMATION_SCHEMA_PROCESS_LIST_TABLE_ID: u32 = 36;

 // ----- End of information_schema tables -----

--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -14,6 +14,7 @@ common-macro.workspace = true
 config.workspace = true
 humantime-serde.workspace = true
 num_cpus.workspace = true
+object-store.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_with.workspace = true
--- a/src/common/config/src/config.rs
+++ b/src/common/config/src/config.rs
@@ -106,7 +106,7 @@ mod tests {
    use common_telemetry::logging::LoggingOptions;
    use common_test_util::temp_dir::create_named_temp_file;
    use common_wal::config::DatanodeWalConfig;
-    use datanode::config::{ObjectStoreConfig, StorageConfig};
+    use datanode::config::StorageConfig;
    use meta_client::MetaClientOptions;
    use serde::{Deserialize, Serialize};

@@ -212,7 +212,7 @@ mod tests {

                // Check the configs from environment variables.
                match &opts.storage.store {
-                    ObjectStoreConfig::S3(s3_config) => {
+                    object_store::config::ObjectStoreConfig::S3(s3_config) => {
                        assert_eq!(s3_config.bucket, "mybucket".to_string());
                    }
                    _ => panic!("unexpected store type"),
--- a/src/common/config/src/lib.rs
+++ b/src/common/config/src/lib.rs
@@ -26,6 +26,9 @@ pub fn metadata_store_dir(store_dir: &str) -> String {
    format!("{store_dir}/metadata")
 }

+/// The default data home directory.
+pub const DEFAULT_DATA_HOME: &str = "./greptimedb_data";
+
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(default)]
 pub struct KvBackendConfig {
--- a/src/common/datasource/src/lib.rs
+++ b/src/common/datasource/src/lib.rs
@@ -21,6 +21,7 @@ pub mod error;
 pub mod file_format;
 pub mod lister;
 pub mod object_store;
+pub mod parquet_writer;
 pub mod share_buffer;
 #[cfg(test)]
 pub mod test_util;
--- a/src/common/datasource/src/object_store.rs
+++ b/src/common/datasource/src/object_store.rs
@@ -13,7 +13,9 @@
 // limitations under the License.

 pub mod fs;
+pub mod oss;
 pub mod s3;
+
 use std::collections::HashMap;

 use lazy_static::lazy_static;
@@ -25,10 +27,12 @@ use url::{ParseError, Url};
 use self::fs::build_fs_backend;
 use self::s3::build_s3_backend;
 use crate::error::{self, Result};
+use crate::object_store::oss::build_oss_backend;
 use crate::util::find_dir_and_filename;

 pub const FS_SCHEMA: &str = "FS";
 pub const S3_SCHEMA: &str = "S3";
+pub const OSS_SCHEMA: &str = "OSS";

 /// Returns `(schema, Option<host>, path)`
 pub fn parse_url(url: &str) -> Result<(String, Option<String>, String)> {
@@ -64,6 +68,12 @@ pub fn build_backend(url: &str, connection: &HashMap<String, String>) -> Result<
            })?;
            Ok(build_s3_backend(&host, &root, connection)?)
        }
+        OSS_SCHEMA => {
+            let host = host.context(error::EmptyHostPathSnafu {
+                url: url.to_string(),
+            })?;
+            Ok(build_oss_backend(&host, &root, connection)?)
+        }
        FS_SCHEMA => Ok(build_fs_backend(&root)?),

        _ => error::UnsupportedBackendProtocolSnafu {
--- a/src/common/datasource/src/object_store/oss.rs
+++ b/src/common/datasource/src/object_store/oss.rs
@@ -0,0 +1,118 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use object_store::services::Oss;
+use object_store::ObjectStore;
+use snafu::ResultExt;
+
+use crate::error::{self, Result};
+
+const BUCKET: &str = "bucket";
+const ENDPOINT: &str = "endpoint";
+const ACCESS_KEY_ID: &str = "access_key_id";
+const ACCESS_KEY_SECRET: &str = "access_key_secret";
+const ROOT: &str = "root";
+const ALLOW_ANONYMOUS: &str = "allow_anonymous";
+
+/// Check if the key is supported in OSS configuration.
+pub fn is_supported_in_oss(key: &str) -> bool {
+    [
+        ROOT,
+        ALLOW_ANONYMOUS,
+        BUCKET,
+        ENDPOINT,
+        ACCESS_KEY_ID,
+        ACCESS_KEY_SECRET,
+    ]
+    .contains(&key)
+}
+
+/// Build an OSS backend using the provided bucket, root, and connection parameters.
+pub fn build_oss_backend(
+    bucket: &str,
+    root: &str,
+    connection: &HashMap<String, String>,
+) -> Result<ObjectStore> {
+    let mut builder = Oss::default().bucket(bucket).root(root);
+
+    if let Some(endpoint) = connection.get(ENDPOINT) {
+        builder = builder.endpoint(endpoint);
+    }
+
+    if let Some(access_key_id) = connection.get(ACCESS_KEY_ID) {
+        builder = builder.access_key_id(access_key_id);
+    }
+
+    if let Some(access_key_secret) = connection.get(ACCESS_KEY_SECRET) {
+        builder = builder.access_key_secret(access_key_secret);
+    }
+
+    if let Some(allow_anonymous) = connection.get(ALLOW_ANONYMOUS) {
+        let allow = allow_anonymous.as_str().parse::<bool>().map_err(|e| {
+            error::InvalidConnectionSnafu {
+                msg: format!(
+                    "failed to parse the option {}={}, {}",
+                    ALLOW_ANONYMOUS, allow_anonymous, e
+                ),
+            }
+            .build()
+        })?;
+        if allow {
+            builder = builder.allow_anonymous();
+        }
+    }
+
+    let op = ObjectStore::new(builder)
+        .context(error::BuildBackendSnafu)?
+        .layer(object_store::layers::LoggingLayer::default())
+        .layer(object_store::layers::TracingLayer)
+        .layer(object_store::layers::build_prometheus_metrics_layer(true))
+        .finish();
+
+    Ok(op)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_is_supported_in_oss() {
+        assert!(is_supported_in_oss(ROOT));
+        assert!(is_supported_in_oss(ALLOW_ANONYMOUS));
+        assert!(is_supported_in_oss(BUCKET));
+        assert!(is_supported_in_oss(ENDPOINT));
+        assert!(is_supported_in_oss(ACCESS_KEY_ID));
+        assert!(is_supported_in_oss(ACCESS_KEY_SECRET));
+        assert!(!is_supported_in_oss("foo"));
+        assert!(!is_supported_in_oss("BAR"));
+    }
+
+    #[test]
+    fn test_build_oss_backend_all_fields_valid() {
+        let mut connection = HashMap::new();
+        connection.insert(
+            ENDPOINT.to_string(),
+            "http://oss-ap-southeast-1.aliyuncs.com".to_string(),
+        );
+        connection.insert(ACCESS_KEY_ID.to_string(), "key_id".to_string());
+        connection.insert(ACCESS_KEY_SECRET.to_string(), "key_secret".to_string());
+        connection.insert(ALLOW_ANONYMOUS.to_string(), "true".to_string());
+
+        let result = build_oss_backend("my-bucket", "my-root", &connection);
+        assert!(result.is_ok());
+    }
+}
--- a/src/common/datasource/src/parquet_writer.rs
+++ b/src/common/datasource/src/parquet_writer.rs
@@ -0,0 +1,52 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use bytes::Bytes;
+use futures::future::BoxFuture;
+use object_store::Writer;
+use parquet::arrow::async_writer::AsyncFileWriter;
+use parquet::errors::ParquetError;
+
+/// Bridges opendal [Writer] with parquet [AsyncFileWriter].
+pub struct AsyncWriter {
+    inner: Writer,
+}
+
+impl AsyncWriter {
+    /// Create a [`AsyncWriter`] by given [`Writer`].
+    pub fn new(writer: Writer) -> Self {
+        Self { inner: writer }
+    }
+}
+
+impl AsyncFileWriter for AsyncWriter {
+    fn write(&mut self, bs: Bytes) -> BoxFuture<'_, parquet::errors::Result<()>> {
+        Box::pin(async move {
+            self.inner
+                .write(bs)
+                .await
+                .map_err(|err| ParquetError::External(Box::new(err)))
+        })
+    }
+
+    fn complete(&mut self) -> BoxFuture<'_, parquet::errors::Result<()>> {
+        Box::pin(async move {
+            self.inner
+                .close()
+                .await
+                .map(|_| ())
+                .map_err(|err| ParquetError::External(Box::new(err)))
+        })
+    }
+}
--- a/src/common/frontend/Cargo.toml
+++ b/src/common/frontend/Cargo.toml
@@ -7,5 +7,13 @@ license.workspace = true
 [dependencies]
 async-trait.workspace = true
 common-error.workspace = true
+common-grpc.workspace = true
 common-macro.workspace = true
+common-meta.workspace = true
+greptime-proto.workspace = true
+meta-client.workspace = true
 snafu.workspace = true
+tonic.workspace = true
+
+[dev-dependencies]
+tokio.workspace = true
--- a/src/common/frontend/src/error.rs
+++ b/src/common/frontend/src/error.rs
@@ -27,6 +27,35 @@ pub enum Error {
        location: Location,
        source: BoxedError,
    },
+
+    #[snafu(display("Failed to list nodes from metasrv"))]
+    Meta {
+        source: Box<meta_client::error::Error>,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to parse process id: {}", s))]
+    ParseProcessId {
+        s: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to invoke frontend service"))]
+    InvokeFrontend {
+        #[snafu(source)]
+        error: tonic::Status,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to invoke list process service"))]
+    CreateChannel {
+        source: common_grpc::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -36,6 +65,10 @@ impl ErrorExt for Error {
        use Error::*;
        match self {
            External { source, .. } => source.status_code(),
+            Meta { source, .. } => source.status_code(),
+            ParseProcessId { .. } => StatusCode::InvalidArguments,
+            InvokeFrontend { .. } => StatusCode::Unexpected,
+            CreateChannel { source, .. } => source.status_code(),
        }
    }

--- a/src/common/frontend/src/lib.rs
+++ b/src/common/frontend/src/lib.rs
@@ -12,4 +12,41 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::fmt::{Display, Formatter};
+use std::str::FromStr;
+
+use snafu::OptionExt;
+
 pub mod error;
+pub mod selector;
+
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct DisplayProcessId {
+    pub server_addr: String,
+    pub id: u32,
+}
+
+impl Display for DisplayProcessId {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}/{}", self.server_addr, self.id)
+    }
+}
+
+impl TryFrom<&str> for DisplayProcessId {
+    type Error = error::Error;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        let mut split = value.split('/');
+        let server_addr = split
+            .next()
+            .context(error::ParseProcessIdSnafu { s: value })?
+            .to_string();
+        let id = split
+            .next()
+            .context(error::ParseProcessIdSnafu { s: value })?;
+        let id = u32::from_str(id)
+            .ok()
+            .context(error::ParseProcessIdSnafu { s: value })?;
+        Ok(DisplayProcessId { server_addr, id })
+    }
+}
--- a/src/common/frontend/src/selector.rs
+++ b/src/common/frontend/src/selector.rs
@@ -0,0 +1,112 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::time::Duration;
+
+use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
+use common_meta::cluster::{ClusterInfo, NodeInfo, Role};
+use greptime_proto::v1::frontend::{
+    frontend_client, KillProcessRequest, KillProcessResponse, ListProcessRequest,
+    ListProcessResponse,
+};
+use meta_client::MetaClientRef;
+use snafu::ResultExt;
+use tonic::Response;
+
+use crate::error;
+use crate::error::{MetaSnafu, Result};
+
+pub type FrontendClientPtr = Box<dyn FrontendClient>;
+
+#[async_trait::async_trait]
+pub trait FrontendClient: Send {
+    async fn list_process(&mut self, req: ListProcessRequest) -> Result<ListProcessResponse>;
+
+    async fn kill_process(&mut self, req: KillProcessRequest) -> Result<KillProcessResponse>;
+}
+
+#[async_trait::async_trait]
+impl FrontendClient for frontend_client::FrontendClient<tonic::transport::channel::Channel> {
+    async fn list_process(&mut self, req: ListProcessRequest) -> Result<ListProcessResponse> {
+        frontend_client::FrontendClient::<tonic::transport::channel::Channel>::list_process(
+            self, req,
+        )
+        .await
+        .context(error::InvokeFrontendSnafu)
+        .map(Response::into_inner)
+    }
+
+    async fn kill_process(&mut self, req: KillProcessRequest) -> Result<KillProcessResponse> {
+        frontend_client::FrontendClient::<tonic::transport::channel::Channel>::kill_process(
+            self, req,
+        )
+        .await
+        .context(error::InvokeFrontendSnafu)
+        .map(Response::into_inner)
+    }
+}
+
+#[async_trait::async_trait]
+pub trait FrontendSelector {
+    async fn select<F>(&self, predicate: F) -> Result<Vec<FrontendClientPtr>>
+    where
+        F: Fn(&NodeInfo) -> bool + Send;
+}
+
+#[derive(Debug, Clone)]
+pub struct MetaClientSelector {
+    meta_client: MetaClientRef,
+    channel_manager: ChannelManager,
+}
+
+#[async_trait::async_trait]
+impl FrontendSelector for MetaClientSelector {
+    async fn select<F>(&self, predicate: F) -> Result<Vec<FrontendClientPtr>>
+    where
+        F: Fn(&NodeInfo) -> bool + Send,
+    {
+        let nodes = self
+            .meta_client
+            .list_nodes(Some(Role::Frontend))
+            .await
+            .map_err(Box::new)
+            .context(MetaSnafu)?;
+
+        nodes
+            .into_iter()
+            .filter(predicate)
+            .map(|node| {
+                let channel = self
+                    .channel_manager
+                    .get(node.peer.addr)
+                    .context(error::CreateChannelSnafu)?;
+                let client = frontend_client::FrontendClient::new(channel);
+                Ok(Box::new(client) as FrontendClientPtr)
+            })
+            .collect::<Result<Vec<_>>>()
+    }
+}
+
+impl MetaClientSelector {
+    pub fn new(meta_client: MetaClientRef) -> Self {
+        let cfg = ChannelConfig::new()
+            .connect_timeout(Duration::from_secs(30))
+            .timeout(Duration::from_secs(30));
+        let channel_manager = ChannelManager::with_config(cfg);
+        Self {
+            meta_client,
+            channel_manager,
+        }
+    }
+}
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -33,6 +33,7 @@ common-version.workspace = true
 datafusion.workspace = true
 datafusion-common.workspace = true
 datafusion-expr.workspace = true
+datafusion-functions-aggregate-common.workspace = true
 datatypes.workspace = true
 derive_more = { version = "1", default-features = false, features = ["display"] }
 geo = { version = "0.29", optional = true }
--- a/src/common/function/src/aggrs.rs
+++ b/src/common/function/src/aggrs.rs
@@ -0,0 +1,19 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod approximate;
+pub mod count_hash;
+#[cfg(feature = "geo")]
+pub mod geo;
+pub mod vector;
--- a/src/common/function/src/aggrs/approximate.rs
+++ b/src/common/function/src/aggrs/approximate.rs
@@ -0,0 +1,32 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::function_registry::FunctionRegistry;
+
+pub mod hll;
+pub mod uddsketch;
+
+pub(crate) struct ApproximateFunction;
+
+impl ApproximateFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        // uddsketch
+        registry.register_aggr(uddsketch::UddSketchState::state_udf_impl());
+        registry.register_aggr(uddsketch::UddSketchState::merge_udf_impl());
+
+        // hll
+        registry.register_aggr(hll::HllState::state_udf_impl());
+        registry.register_aggr(hll::HllState::merge_udf_impl());
+    }
+}
--- a/src/common/function/src/aggrs/approximate/hll.rs
+++ b/src/common/function/src/aggrs/approximate/hll.rs
--- a/src/common/function/src/aggrs/approximate/uddsketch.rs
+++ b/src/common/function/src/aggrs/approximate/uddsketch.rs
--- a/src/common/function/src/aggrs/count_hash.rs
+++ b/src/common/function/src/aggrs/count_hash.rs
@@ -0,0 +1,647 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! `CountHash` / `count_hash` is a hash-based approximate distinct count function.
+//!
+//! It is a variant of `CountDistinct` that uses a hash function to approximate the
+//! distinct count.
+//! It is designed to be more efficient than `CountDistinct` for large datasets,
+//! but it is not as accurate, as the hash value may be collision.
+
+use std::collections::HashSet;
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use ahash::RandomState;
+use datafusion_common::cast::as_list_array;
+use datafusion_common::error::Result;
+use datafusion_common::hash_utils::create_hashes;
+use datafusion_common::utils::SingleRowListArrayBuilder;
+use datafusion_common::{internal_err, not_impl_err, ScalarValue};
+use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
+use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
+use datafusion_expr::{
+    Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF,
+    SetMonotonicity, Signature, TypeSignature, Volatility,
+};
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
+use datatypes::arrow;
+use datatypes::arrow::array::{
+    Array, ArrayRef, AsArray, BooleanArray, Int64Array, ListArray, UInt64Array,
+};
+use datatypes::arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use datatypes::arrow::datatypes::{DataType, Field};
+
+use crate::function_registry::FunctionRegistry;
+
+type HashValueType = u64;
+
+// read from /dev/urandom 4047821dc6144e4b2abddf23ad4171126a52eeecd26eff2191cf673b965a7875
+const RANDOM_SEED_0: u64 = 0x4047821dc6144e4b;
+const RANDOM_SEED_1: u64 = 0x2abddf23ad417112;
+const RANDOM_SEED_2: u64 = 0x6a52eeecd26eff21;
+const RANDOM_SEED_3: u64 = 0x91cf673b965a7875;
+
+impl CountHash {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register_aggr(CountHash::udf_impl());
+    }
+
+    pub fn udf_impl() -> AggregateUDF {
+        AggregateUDF::new_from_impl(CountHash {
+            signature: Signature::one_of(
+                vec![TypeSignature::VariadicAny, TypeSignature::Nullary],
+                Volatility::Immutable,
+            ),
+        })
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct CountHash {
+    signature: Signature,
+}
+
+impl AggregateUDFImpl for CountHash {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "count_hash"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Int64)
+    }
+
+    fn is_nullable(&self) -> bool {
+        false
+    }
+
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
+        Ok(vec![Field::new_list(
+            format_state_name(args.name, "count_hash"),
+            Field::new_list_field(DataType::UInt64, true),
+            // For count_hash accumulator, null list item stands for an
+            // empty value set (i.e., all NULL value so far for that group).
+            true,
+        )])
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        if acc_args.exprs.len() > 1 {
+            return not_impl_err!("count_hash with multiple arguments");
+        }
+
+        Ok(Box::new(CountHashAccumulator {
+            values: HashSet::default(),
+            random_state: RandomState::with_seeds(
+                RANDOM_SEED_0,
+                RANDOM_SEED_1,
+                RANDOM_SEED_2,
+                RANDOM_SEED_3,
+            ),
+            batch_hashes: vec![],
+        }))
+    }
+
+    fn aliases(&self) -> &[String] {
+        &[]
+    }
+
+    fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        if args.exprs.len() > 1 {
+            return not_impl_err!("count_hash with multiple arguments");
+        }
+
+        Ok(Box::new(CountHashGroupAccumulator::new()))
+    }
+
+    fn reverse_expr(&self) -> ReversedUDAF {
+        ReversedUDAF::Identical
+    }
+
+    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
+        AggregateOrderSensitivity::Insensitive
+    }
+
+    fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
+        Ok(ScalarValue::Int64(Some(0)))
+    }
+
+    fn set_monotonicity(&self, _data_type: &DataType) -> SetMonotonicity {
+        SetMonotonicity::Increasing
+    }
+}
+
+/// GroupsAccumulator for `count_hash` aggregate function
+#[derive(Debug)]
+pub struct CountHashGroupAccumulator {
+    /// One HashSet per group to track distinct values
+    distinct_sets: Vec<HashSet<HashValueType, RandomState>>,
+    random_state: RandomState,
+    batch_hashes: Vec<HashValueType>,
+}
+
+impl Default for CountHashGroupAccumulator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CountHashGroupAccumulator {
+    pub fn new() -> Self {
+        Self {
+            distinct_sets: vec![],
+            random_state: RandomState::with_seeds(
+                RANDOM_SEED_0,
+                RANDOM_SEED_1,
+                RANDOM_SEED_2,
+                RANDOM_SEED_3,
+            ),
+            batch_hashes: vec![],
+        }
+    }
+
+    fn ensure_sets(&mut self, total_num_groups: usize) {
+        if self.distinct_sets.len() < total_num_groups {
+            self.distinct_sets
+                .resize_with(total_num_groups, HashSet::default);
+        }
+    }
+}
+
+impl GroupsAccumulator for CountHashGroupAccumulator {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "count_hash expects a single argument");
+        self.ensure_sets(total_num_groups);
+
+        let array = &values[0];
+        self.batch_hashes.clear();
+        self.batch_hashes.resize(array.len(), 0);
+        let hashes = create_hashes(
+            &[ArrayRef::clone(array)],
+            &self.random_state,
+            &mut self.batch_hashes,
+        )?;
+
+        // Use a pattern similar to accumulate_indices to process rows
+        // that are not null and pass the filter
+        let nulls = array.logical_nulls();
+
+        match (nulls.as_ref(), opt_filter) {
+            (None, None) => {
+                // No nulls, no filter - process all rows
+                for (row_idx, &group_idx) in group_indices.iter().enumerate() {
+                    self.distinct_sets[group_idx].insert(hashes[row_idx]);
+                }
+            }
+            (Some(nulls), None) => {
+                // Has nulls, no filter
+                for (row_idx, (&group_idx, is_valid)) in
+                    group_indices.iter().zip(nulls.iter()).enumerate()
+                {
+                    if is_valid {
+                        self.distinct_sets[group_idx].insert(hashes[row_idx]);
+                    }
+                }
+            }
+            (None, Some(filter)) => {
+                // No nulls, has filter
+                for (row_idx, (&group_idx, filter_value)) in
+                    group_indices.iter().zip(filter.iter()).enumerate()
+                {
+                    if let Some(true) = filter_value {
+                        self.distinct_sets[group_idx].insert(hashes[row_idx]);
+                    }
+                }
+            }
+            (Some(nulls), Some(filter)) => {
+                // Has nulls and filter
+                let iter = filter
+                    .iter()
+                    .zip(group_indices.iter())
+                    .zip(nulls.iter())
+                    .enumerate();
+
+                for (row_idx, ((filter_value, &group_idx), is_valid)) in iter {
+                    if is_valid && filter_value == Some(true) {
+                        self.distinct_sets[group_idx].insert(hashes[row_idx]);
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        let distinct_sets: Vec<HashSet<u64, RandomState>> =
+            emit_to.take_needed(&mut self.distinct_sets);
+
+        let counts = distinct_sets
+            .iter()
+            .map(|set| set.len() as i64)
+            .collect::<Vec<_>>();
+        Ok(Arc::new(Int64Array::from(counts)))
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        _opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(
+            values.len(),
+            1,
+            "count_hash merge expects a single state array"
+        );
+        self.ensure_sets(total_num_groups);
+
+        let list_array = as_list_array(&values[0])?;
+
+        // For each group in the incoming batch
+        for (i, &group_idx) in group_indices.iter().enumerate() {
+            if i < list_array.len() {
+                let inner_array = list_array.value(i);
+                let inner_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
+                // Add each value to our set for this group
+                for j in 0..inner_array.len() {
+                    if !inner_array.is_null(j) {
+                        self.distinct_sets[group_idx].insert(inner_array.value(j));
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        let distinct_sets: Vec<HashSet<u64, RandomState>> =
+            emit_to.take_needed(&mut self.distinct_sets);
+
+        let mut offsets = Vec::with_capacity(distinct_sets.len() + 1);
+        offsets.push(0);
+        let mut curr_len = 0i32;
+
+        let mut value_iter = distinct_sets
+            .into_iter()
+            .flat_map(|set| {
+                // build offset
+                curr_len += set.len() as i32;
+                offsets.push(curr_len);
+                // convert into iter
+                set.into_iter()
+            })
+            .peekable();
+        let data_array: ArrayRef = if value_iter.peek().is_none() {
+            arrow::array::new_empty_array(&DataType::UInt64) as _
+        } else {
+            Arc::new(UInt64Array::from_iter_values(value_iter))
+        };
+        let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(offsets));
+
+        let list_array = ListArray::new(
+            Arc::new(Field::new_list_field(DataType::UInt64, true)),
+            offset_buffer,
+            data_array,
+            None,
+        );
+
+        Ok(vec![Arc::new(list_array) as _])
+    }
+
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        // For a single hash value per row, create a list array with that value
+        assert_eq!(values.len(), 1, "count_hash expects a single argument");
+        let values = ArrayRef::clone(&values[0]);
+
+        let offsets = OffsetBuffer::new(ScalarBuffer::from_iter(0..values.len() as i32 + 1));
+        let nulls = filtered_null_mask(opt_filter, &values);
+        let list_array = ListArray::new(
+            Arc::new(Field::new_list_field(DataType::UInt64, true)),
+            offsets,
+            values,
+            nulls,
+        );
+
+        Ok(vec![Arc::new(list_array)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
+    fn size(&self) -> usize {
+        // Base size of the struct
+        let mut size = size_of::<Self>();
+
+        // Size of the vector holding the HashSets
+        size += size_of::<Vec<HashSet<HashValueType, RandomState>>>()
+            + self.distinct_sets.capacity() * size_of::<HashSet<HashValueType, RandomState>>();
+
+        // Estimate HashSet contents size more efficiently
+        // Instead of iterating through all values which is expensive, use an approximation
+        for set in &self.distinct_sets {
+            // Base size of the HashSet
+            size += set.capacity() * size_of::<HashValueType>();
+        }
+
+        size
+    }
+}
+
+#[derive(Debug)]
+struct CountHashAccumulator {
+    values: HashSet<HashValueType, RandomState>,
+    random_state: RandomState,
+    batch_hashes: Vec<HashValueType>,
+}
+
+impl CountHashAccumulator {
+    // calculating the size for fixed length values, taking first batch size *
+    // number of batches.
+    fn fixed_size(&self) -> usize {
+        size_of_val(self) + (size_of::<HashValueType>() * self.values.capacity())
+    }
+}
+
+impl Accumulator for CountHashAccumulator {
+    /// Returns the distinct values seen so far as (one element) ListArray.
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        let values = self.values.iter().cloned().collect::<Vec<_>>();
+        let arr = Arc::new(UInt64Array::from(values)) as _;
+        let list_scalar = SingleRowListArrayBuilder::new(arr).build_list_scalar();
+        Ok(vec![list_scalar])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        let arr = &values[0];
+        if arr.data_type() == &DataType::Null {
+            return Ok(());
+        }
+
+        self.batch_hashes.clear();
+        self.batch_hashes.resize(arr.len(), 0);
+        let hashes = create_hashes(
+            &[ArrayRef::clone(arr)],
+            &self.random_state,
+            &mut self.batch_hashes,
+        )?;
+        for hash in hashes.as_slice() {
+            self.values.insert(*hash);
+        }
+        Ok(())
+    }
+
+    /// Merges multiple sets of distinct values into the current set.
+    ///
+    /// The input to this function is a `ListArray` with **multiple** rows,
+    /// where each row contains the values from a partial aggregate's phase (e.g.
+    /// the result of calling `Self::state` on multiple accumulators).
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        if states.is_empty() {
+            return Ok(());
+        }
+        assert_eq!(states.len(), 1, "array_agg states must be singleton!");
+        let array = &states[0];
+        let list_array = array.as_list::<i32>();
+        for inner_array in list_array.iter() {
+            let Some(inner_array) = inner_array else {
+                return internal_err!(
+                    "Intermediate results of count_hash should always be non null"
+                );
+            };
+            let hash_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
+            for i in 0..hash_array.len() {
+                self.values.insert(hash_array.value(i));
+            }
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
+    }
+
+    fn size(&self) -> usize {
+        self.fixed_size()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datatypes::arrow::array::{Array, BooleanArray, Int32Array, Int64Array};
+
+    use super::*;
+
+    fn create_test_accumulator() -> CountHashAccumulator {
+        CountHashAccumulator {
+            values: HashSet::default(),
+            random_state: RandomState::with_seeds(
+                RANDOM_SEED_0,
+                RANDOM_SEED_1,
+                RANDOM_SEED_2,
+                RANDOM_SEED_3,
+            ),
+            batch_hashes: vec![],
+        }
+    }
+
+    #[test]
+    fn test_count_hash_accumulator() -> Result<()> {
+        let mut acc = create_test_accumulator();
+
+        // Test with some data
+        let array = Arc::new(Int32Array::from(vec![
+            Some(1),
+            Some(2),
+            Some(3),
+            Some(1),
+            Some(2),
+            None,
+        ])) as ArrayRef;
+        acc.update_batch(&[array])?;
+        let result = acc.evaluate()?;
+        assert_eq!(result, ScalarValue::Int64(Some(4)));
+
+        // Test with empty data
+        let mut acc = create_test_accumulator();
+        let array = Arc::new(Int32Array::from(vec![] as Vec<Option<i32>>)) as ArrayRef;
+        acc.update_batch(&[array])?;
+        let result = acc.evaluate()?;
+        assert_eq!(result, ScalarValue::Int64(Some(0)));
+
+        // Test with only nulls
+        let mut acc = create_test_accumulator();
+        let array = Arc::new(Int32Array::from(vec![None, None, None])) as ArrayRef;
+        acc.update_batch(&[array])?;
+        let result = acc.evaluate()?;
+        assert_eq!(result, ScalarValue::Int64(Some(1)));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_count_hash_accumulator_merge() -> Result<()> {
+        // Accumulator 1
+        let mut acc1 = create_test_accumulator();
+        let array1 = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])) as ArrayRef;
+        acc1.update_batch(&[array1])?;
+        let state1 = acc1.state()?;
+
+        // Accumulator 2
+        let mut acc2 = create_test_accumulator();
+        let array2 = Arc::new(Int32Array::from(vec![Some(3), Some(4), Some(5)])) as ArrayRef;
+        acc2.update_batch(&[array2])?;
+        let state2 = acc2.state()?;
+
+        // Merge state1 and state2 into a new accumulator
+        let mut acc_merged = create_test_accumulator();
+        let state_array1 = state1[0].to_array()?;
+        let state_array2 = state2[0].to_array()?;
+
+        acc_merged.merge_batch(&[state_array1])?;
+        acc_merged.merge_batch(&[state_array2])?;
+
+        let result = acc_merged.evaluate()?;
+        // Distinct values are {1, 2, 3, 4, 5}, so count is 5
+        assert_eq!(result, ScalarValue::Int64(Some(5)));
+
+        Ok(())
+    }
+
+    fn create_test_group_accumulator() -> CountHashGroupAccumulator {
+        CountHashGroupAccumulator::new()
+    }
+
+    #[test]
+    fn test_count_hash_group_accumulator() -> Result<()> {
+        let mut acc = create_test_group_accumulator();
+        let values = Arc::new(Int32Array::from(vec![1, 2, 1, 3, 2, 4, 5])) as ArrayRef;
+        let group_indices = vec![0, 1, 0, 0, 1, 2, 0];
+        let total_num_groups = 3;
+
+        acc.update_batch(&[values], &group_indices, None, total_num_groups)?;
+
+        let result_array = acc.evaluate(EmitTo::All)?;
+        let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        // Group 0: {1, 3, 5} -> 3
+        // Group 1: {2} -> 1
+        // Group 2: {4} -> 1
+        assert_eq!(result.value(0), 3);
+        assert_eq!(result.value(1), 1);
+        assert_eq!(result.value(2), 1);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_count_hash_group_accumulator_with_filter() -> Result<()> {
+        let mut acc = create_test_group_accumulator();
+        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])) as ArrayRef;
+        let group_indices = vec![0, 0, 1, 1, 2, 2];
+        let filter = BooleanArray::from(vec![true, false, true, true, false, true]);
+        let total_num_groups = 3;
+
+        acc.update_batch(&[values], &group_indices, Some(&filter), total_num_groups)?;
+
+        let result_array = acc.evaluate(EmitTo::All)?;
+        let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        // Group 0: {1} (2 is filtered out) -> 1
+        // Group 1: {3, 4} -> 2
+        // Group 2: {6} (5 is filtered out) -> 1
+        assert_eq!(result.value(0), 1);
+        assert_eq!(result.value(1), 2);
+        assert_eq!(result.value(2), 1);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_count_hash_group_accumulator_merge() -> Result<()> {
+        // Accumulator 1
+        let mut acc1 = create_test_group_accumulator();
+        let values1 = Arc::new(Int32Array::from(vec![1, 2, 3, 4])) as ArrayRef;
+        let group_indices1 = vec![0, 0, 1, 1];
+        acc1.update_batch(&[values1], &group_indices1, None, 2)?;
+        // acc1 state: group 0 -> {1, 2}, group 1 -> {3, 4}
+        let state1 = acc1.state(EmitTo::All)?;
+
+        // Accumulator 2
+        let mut acc2 = create_test_group_accumulator();
+        let values2 = Arc::new(Int32Array::from(vec![5, 6, 1, 3])) as ArrayRef;
+        // Merge into different group indices
+        let group_indices2 = vec![2, 2, 0, 1];
+        acc2.update_batch(&[values2], &group_indices2, None, 3)?;
+        // acc2 state: group 0 -> {1}, group 1 -> {3}, group 2 -> {5, 6}
+
+        // Merge state from acc1 into acc2
+        // We will merge acc1's group 0 into acc2's group 0
+        // and acc1's group 1 into acc2's group 2
+        let merge_group_indices = vec![0, 2];
+        acc2.merge_batch(&state1, &merge_group_indices, None, 3)?;
+
+        let result_array = acc2.evaluate(EmitTo::All)?;
+        let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        // Final state of acc2:
+        // Group 0: {1} U {1, 2} -> {1, 2}, count = 2
+        // Group 1: {3}, count = 1
+        // Group 2: {5, 6} U {3, 4} -> {3, 4, 5, 6}, count = 4
+        assert_eq!(result.value(0), 2);
+        assert_eq!(result.value(1), 1);
+        assert_eq!(result.value(2), 4);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_size() {
+        let acc = create_test_group_accumulator();
+        // Just test it doesn't crash and returns a value.
+        assert!(acc.size() > 0);
+    }
+}
--- a/Show More
+++ b/Show More