chore: bump version to 0.9.5 (#4853 )

fix: flush metric metadata region (#4852 )
* fix: flush metric metadata region * chore: apply suggestions from CR
2026-01-07 22:02:56 +00:00 · 2024-10-18 08:07:13 +00:00 · 2024-10-18 07:21:35 +00:00 · 2024-10-18 06:31:02 +00:00 · 2024-10-18 06:26:41 +00:00 · 2024-10-18 04:10:22 +00:00
398 changed files with 20746 additions and 8238 deletions
--- a/.github/actions/build-dev-builder-images/action.yml
+++ b/.github/actions/build-dev-builder-images/action.yml
@@ -50,7 +50,7 @@ runs:
          BUILDX_MULTI_PLATFORM_BUILD=all \
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
-          IMAGE_TAG=${{ inputs.version }}
+          DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}

    - name: Build and push dev-builder-centos image
      shell: bash
@@ -61,7 +61,7 @@ runs:
          BUILDX_MULTI_PLATFORM_BUILD=amd64 \
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
-          IMAGE_TAG=${{ inputs.version }}
+          DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}

    - name: Build and push dev-builder-android image # Only build image for amd64 platform.
      shell: bash
@@ -71,6 +71,6 @@ runs:
          BASE_IMAGE=android \
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
-          IMAGE_TAG=${{ inputs.version }} && \
+          DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \

        docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -269,6 +269,13 @@ jobs:
      - name: Install cargo-gc-bin
        shell: bash
        run: cargo install cargo-gc-bin
+      - name: Check aws-lc-sys will not build
+        shell: bash
+        run: |
+             if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
+               echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
+               exit 1
+             fi
      - name: Build greptime bianry
        shell: bash
        # `cargo gc` will invoke `cargo build` with specified args
@@ -435,6 +442,13 @@ jobs:
            minio: true
            kafka: true
            values: "with-remote-wal.yaml"
+        include:
+          - target: "fuzz_migrate_mito_regions"
+            mode:
+              name: "Local WAL"
+              minio: true
+              kafka: false
+              values: "with-minio.yaml"
    steps:
      - name: Remove unused software
        run: |
@@ -523,7 +537,7 @@ jobs:
        with:
          image-registry: localhost:5001
          values-filename: ${{ matrix.mode.values }}
-          enable-region-failover: true
+          enable-region-failover: ${{ matrix.mode.kafka }}
      - name: Port forward (mysql)
        run: |
          kubectl port-forward service/my-greptimedb-frontend 4002:4002 -n my-greptimedb&
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,7 @@ members = [
    "src/common/mem-prof",
    "src/common/meta",
    "src/common/plugins",
+    "src/common/pprof",
    "src/common/procedure",
    "src/common/procedure-test",
    "src/common/query",
@@ -64,7 +65,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.9.3"
+version = "0.9.5"
 edition = "2021"
 license = "Apache-2.0"

@@ -120,11 +121,11 @@ etcd-client = { version = "0.13" }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b4d301184eb0d01fd4d1042fcc7c5dfb54f3c1e3" }
 humantime = "2.1"
 humantime-serde = "1.1"
 itertools = "0.10"
-jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
+jsonb = { git = "https://github.com/datafuselabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
 lazy_static = "1.4"
 meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
 mockall = "0.11.4"
@@ -137,12 +138,13 @@ opentelemetry-proto = { version = "0.5", features = [
    "metrics",
    "trace",
    "with-serde",
+    "logs",
 ] }
 parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.4" }
+promql-parser = { version = "0.4.1" }
 prost = "0.12"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.8"
@@ -208,6 +210,7 @@ common-macro = { path = "src/common/macro" }
 common-mem-prof = { path = "src/common/mem-prof" }
 common-meta = { path = "src/common/meta" }
 common-plugins = { path = "src/common/plugins" }
+common-pprof = { path = "src/common/pprof" }
 common-procedure = { path = "src/common/procedure" }
 common-procedure-test = { path = "src/common/procedure-test" }
 common-query = { path = "src/common/query" }
@@ -245,6 +248,15 @@ store-api = { path = "src/store-api" }
 substrait = { path = "src/common/substrait" }
 table = { path = "src/table" }

+[patch.crates-io]
+# change all rustls dependencies to use our fork to default to `ring` to make it "just work"
+hyper-rustls = { git = "https://github.com/GreptimeTeam/hyper-rustls" }
+rustls = { git = "https://github.com/GreptimeTeam/rustls" }
+tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
+# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
+# see https://github.com/aws/aws-lc-rs/pull/526
+# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
+
 [workspace.dependencies.meter-macros]
 git = "https://github.com/GreptimeTeam/greptime-meter.git"
 rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
--- a/2
+++ b/2
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
 IMAGE_REGISTRY ?= docker.io
 IMAGE_NAMESPACE ?= greptime
 IMAGE_TAG ?= latest
-DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-b4b105ad-20240827021230
+DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-5674c14f-20240920110415
 BUILDX_MULTI_PLATFORM_BUILD ?= false
 BUILDX_BUILDER_NAME ?= gtbuilder
 BASE_IMAGE ?= ubuntu
--- a/config/config.md
+++ b/config/config.md
@@ -17,6 +17,7 @@
 | `default_timezone` | String | Unset | The default timezone of the server. |
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
+| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -82,6 +83,7 @@
 | `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system <br/>can still successfully replay memtable data without throwing an <br/>out-of-range error. <br/>However, enabling this option might lead to unexpected data loss, <br/>as the system will skip over missing entries instead of treating <br/>them as critical errors. |
 | `metadata_store` | -- | -- | Metadata storage options. |
 | `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
 | `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
@@ -160,8 +162,13 @@
 | `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
+| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `logging.slow_query` | -- | -- | The slow query log options. |
+| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
+| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
+| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -246,8 +253,13 @@
 | `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
+| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `logging.slow_query` | -- | -- | The slow query log options. |
+| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
+| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
+| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -311,8 +323,13 @@
 | `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
+| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `logging.slow_query` | -- | -- | The slow query log options. |
+| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
+| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
+| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -335,6 +352,7 @@
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
+| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
 | `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
 | `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
 | `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
@@ -392,6 +410,7 @@
 | `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system <br/>can still successfully replay memtable data without throwing an <br/>out-of-range error. <br/>However, enabling this option might lead to unexpected data loss, <br/>as the system will skip over missing entries instead of treating <br/>them as critical errors. |
 | `storage` | -- | -- | The data storage options. |
 | `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -462,8 +481,13 @@
 | `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
+| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `logging.slow_query` | -- | -- | The slow query log options. |
+| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
+| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
+| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
 | `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -508,7 +532,12 @@
 | `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
+| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `logging.slow_query` | -- | -- | The slow query log options. |
+| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
+| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
+| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -19,6 +19,9 @@ enable_telemetry = true
 ## Parallelism of initializing regions.
 init_regions_parallelism = 16

+## The maximum current queries allowed to be executed. Zero means unlimited.
+max_concurrent_queries = 0
+
 ## Deprecated, use `grpc.addr` instead.
 ## @toml2docs:none-default
 rpc_addr = "127.0.0.1:3001"
@@ -210,6 +213,17 @@ create_index = true
 ## **It's only used when the provider is `kafka`**.
 dump_index_interval = "60s"

+## Ignore missing entries during read WAL.
+## **It's only used when the provider is `kafka`**.
+## 
+## This option ensures that when Kafka messages are deleted, the system 
+## can still successfully replay memtable data without throwing an 
+## out-of-range error. 
+## However, enabling this option might lead to unexpected data loss, 
+## as the system will skip over missing entries instead of treating 
+## them as critical errors.
+overwrite_entry_start_id = false
+
 # The Kafka SASL configuration.
 # **It's only used when the provider is `kafka`**.
 # Available SASL mechanisms:
@@ -577,12 +591,28 @@ append_stdout = true
 ## The log format. Can be `text`/`json`.
 log_format = "text"

+## The maximum amount of log files.
+max_log_files = 720
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

+## The slow query log options.
+[logging.slow_query]
+## Whether to enable slow query log.
+enable = false
+
+## The threshold of slow query.
+## @toml2docs:none-default
+threshold = "10s"
+
+## The sampling ratio of slow query log. The value should be in the range of (0, 1].
+## @toml2docs:none-default
+sample_ratio = 1.0
+
 ## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -78,12 +78,28 @@ append_stdout = true
 ## The log format. Can be `text`/`json`.
 log_format = "text"

+## The maximum amount of log files.
+max_log_files = 720
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

+## The slow query log options.
+[logging.slow_query]
+## Whether to enable slow query log.
+enable = false
+
+## The threshold of slow query.
+## @toml2docs:none-default
+threshold = "10s"
+
+## The sampling ratio of slow query log. The value should be in the range of (0, 1].
+## @toml2docs:none-default
+sample_ratio = 1.0
+
 ## The tracing options. Only effect when compiled with `tokio-console` feature.
 [tracing]
 ## The tokio console address.
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -185,12 +185,28 @@ append_stdout = true
 ## The log format. Can be `text`/`json`.
 log_format = "text"

+## The maximum amount of log files.
+max_log_files = 720
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

+## The slow query log options.
+[logging.slow_query]
+## Whether to enable slow query log.
+enable = false
+
+## The threshold of slow query.
+## @toml2docs:none-default
+threshold = "10s"
+
+## The sampling ratio of slow query log. The value should be in the range of (0, 1].
+## @toml2docs:none-default
+sample_ratio = 1.0
+
 ## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -172,12 +172,28 @@ append_stdout = true
 ## The log format. Can be `text`/`json`.
 log_format = "text"

+## The maximum amount of log files.
+max_log_files = 720
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

+## The slow query log options.
+[logging.slow_query]
+## Whether to enable slow query log.
+enable = false
+
+## The threshold of slow query.
+## @toml2docs:none-default
+threshold = "10s"
+
+## The sampling ratio of slow query log. The value should be in the range of (0, 1].
+## @toml2docs:none-default
+sample_ratio = 1.0
+
 ## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -15,6 +15,9 @@ init_regions_in_background = false
 ## Parallelism of initializing regions.
 init_regions_parallelism = 16

+## The maximum current queries allowed to be executed. Zero means unlimited.
+max_concurrent_queries = 0
+
 ## The runtime options.
 #+ [runtime]
 ## The number of threads to execute the runtime for global read operations.
@@ -234,6 +237,17 @@ backoff_base = 2
 ## **It's only used when the provider is `kafka`**.
 backoff_deadline = "5mins"

+## Ignore missing entries during read WAL.
+## **It's only used when the provider is `kafka`**.
+## 
+## This option ensures that when Kafka messages are deleted, the system 
+## can still successfully replay memtable data without throwing an 
+## out-of-range error. 
+## However, enabling this option might lead to unexpected data loss, 
+## as the system will skip over missing entries instead of treating 
+## them as critical errors.
+overwrite_entry_start_id = false
+
 # The Kafka SASL configuration.
 # **It's only used when the provider is `kafka`**.
 # Available SASL mechanisms:
@@ -621,12 +635,28 @@ append_stdout = true
 ## The log format. Can be `text`/`json`.
 log_format = "text"

+## The maximum amount of log files.
+max_log_files = 720
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
 [logging.tracing_sample_ratio]
 default_ratio = 1.0

+## The slow query log options.
+[logging.slow_query]
+## Whether to enable slow query log.
+enable = false
+
+## The threshold of slow query.
+## @toml2docs:none-default
+threshold = "10s"
+
+## The sampling ratio of slow query log. The value should be in the range of (0, 1].
+## @toml2docs:none-default
+sample_ratio = 1.0
+
 ## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
 ## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
 [export_metrics]
--- a/docker/dev-builder/binstall/pull_binstall.sh
+++ b/docker/dev-builder/binstall/pull_binstall.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+cd "$(mktemp -d)"
+# Fix version to v1.6.6, this is different than the latest version in original install script in
+# https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh
+base_url="https://github.com/cargo-bins/cargo-binstall/releases/download/v1.6.6/cargo-binstall-"
+
+os="$(uname -s)"
+if [ "$os" == "Darwin" ]; then
+    url="${base_url}universal-apple-darwin.zip"
+    curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
+    unzip cargo-binstall-universal-apple-darwin.zip
+elif [ "$os" == "Linux" ]; then
+    machine="$(uname -m)"
+    if [ "$machine" == "armv7l" ]; then
+        machine="armv7"
+    fi
+    target="${machine}-unknown-linux-musl"
+    if [ "$machine" == "armv7" ]; then
+        target="${target}eabihf"
+    fi
+
+    url="${base_url}${target}.tgz"
+    curl -L --proto '=https' --tlsv1.2 -sSf "$url" | tar -xvzf -
+elif [ "${OS-}" = "Windows_NT" ]; then
+    machine="$(uname -m)"
+    target="${machine}-pc-windows-msvc"
+    url="${base_url}${target}.zip"
+    curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
+    unzip "cargo-binstall-${target}.zip"
+else
+    echo "Unsupported OS ${os}"
+    exit 1
+fi
+
+./cargo-binstall -y --force cargo-binstall
+
+CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
+
+if ! [[ ":$PATH:" == *":$CARGO_HOME/bin:"* ]]; then
+    if [ -n "${CI:-}" ] && [ -n "${GITHUB_PATH:-}" ]; then
+        echo "$CARGO_HOME/bin" >> "$GITHUB_PATH"
+    else
+        echo
+        printf "\033[0;31mYour path is missing %s, you might want to add it.\033[0m\n" "$CARGO_HOME/bin"
+        echo
+    fi
+fi
--- a/docker/dev-builder/centos/Dockerfile
+++ b/docker/dev-builder/centos/Dockerfile
@@ -32,7 +32,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}

 # Install cargo-binstall with a specific version to adapt the current rust toolchain.
 # Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
-RUN cargo install cargo-binstall --version 1.6.6 --locked
+# compile from source take too long, so we use the precompiled binary instead
+COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
+RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh

 # Install nextest.
 RUN cargo binstall cargo-nextest --no-confirm
--- a/docker/dev-builder/ubuntu/Dockerfile
+++ b/docker/dev-builder/ubuntu/Dockerfile
@@ -24,6 +24,15 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    python3.10 \
    python3.10-dev

+# https://github.com/GreptimeTeam/greptimedb/actions/runs/10935485852/job/30357457188#step:3:7106
+# `aws-lc-sys` require gcc >= 10.3.0 to work, hence alias to use gcc-10
+RUN apt-get remove -y gcc-9 g++-9 cpp-9 && \
+    apt-get install -y gcc-10 g++-10 cpp-10 make cmake && \
+    ln -sf /usr/bin/gcc-10 /usr/bin/gcc && ln -sf /usr/bin/g++-10 /usr/bin/g++ && \
+    ln -sf /usr/bin/gcc-10 /usr/bin/cc && \
+    ln -sf /usr/bin/g++-10 /usr/bin/cpp && ln -sf /usr/bin/g++-10 /usr/bin/c++ && \
+    cc --version && gcc --version && g++ --version && cpp --version && c++ --version
+
 # Remove Python 3.8 and install pip.
 RUN apt-get -y purge python3.8 && \
    apt-get -y autoremove && \
@@ -57,7 +66,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}

 # Install cargo-binstall with a specific version to adapt the current rust toolchain.
 # Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
-RUN cargo install cargo-binstall --version 1.6.6 --locked
+# compile from source take too long, so we use the precompiled binary instead
+COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
+RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh

 # Install nextest.
 RUN cargo binstall cargo-nextest --no-confirm
--- a/docs/benchmarks/log/README.md
+++ b/docs/benchmarks/log/README.md
@@ -48,4 +48,4 @@ Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [que

 ## Addition
 - You can tune GreptimeDB's configuration to get better performance.
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/operations/configuration/#storage-options).
+- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments/configuration#storage-options).
--- a/src/servers/src/http/pprof/README.md
+++ b/src/servers/src/http/pprof/README.md
@@ -1,15 +1,9 @@
 # Profiling CPU

-## Build GreptimeDB with `pprof` feature
-
-```bash
-cargo build --features=pprof
-```
-
 ## HTTP API
 Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto).
 ```bash
-curl -s '0:4000/v1/prof/cpu' > /tmp/pprof.out
+curl -s '0:4000/debug/prof/cpu' > /tmp/pprof.out
 ```

 Then you can use `pprof` command with the protobuf file.
@@ -19,10 +13,10 @@ go tool pprof -top /tmp/pprof.out

 Sample at 99 Hertz, for 60 seconds, output report in flamegraph format.
 ```bash
-curl -s '0:4000/v1/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
+curl -s '0:4000/debug/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
 ```

 Sample at 49 Hertz, for 10 seconds, output report in text format.
 ```bash
-curl -s '0:4000/v1/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
+curl -s '0:4000/debug/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
 ```
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -12,16 +12,10 @@ brew install jemalloc
 sudo apt install libjemalloc-dev
 ```

-### [flamegraph](https://github.com/brendangregg/FlameGraph) 
+### [flamegraph](https://github.com/brendangregg/FlameGraph)

 ```bash
-curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl 
-```
-
-### Build GreptimeDB with `mem-prof` feature.
-
-```bash
-cargo build --features=mem-prof
+curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
 ```

 ## Profiling
@@ -35,7 +29,7 @@ MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone sta
 Dump memory profiling data through HTTP API:

 ```bash
-curl localhost:4000/v1/prof/mem > greptime.hprof
+curl localhost:4000/debug/prof/mem > greptime.hprof
 ```

 You can periodically dump profiling data and compare them to find the delta memory usage.
@@ -45,6 +39,9 @@ You can periodically dump profiling data and compare them to find the delta memo
 To create flamegraph according to dumped profiling data:

 ```bash
-jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
-```
+sudo apt install -y libjemalloc-dev

+jeprof <path_to_greptime_binary> <profile_data> --collapse | ./flamegraph.pl > mem-prof.svg
+
+jeprof <path_to_greptime_binary> --base <baseline_prof> <profile_data> --collapse | ./flamegraph.pl > output.svg
+```
--- a/docs/logo-text-padding-dark.png
+++ b/docs/logo-text-padding-dark.png
--- a/docs/logo-text-padding.png
+++ b/docs/logo-text-padding.png
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -17,10 +17,11 @@ use std::sync::Arc;
 use common_base::BitVec;
 use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION};
 use common_decimal::Decimal128;
-use common_time::interval::IntervalUnit;
 use common_time::time::Time;
 use common_time::timestamp::TimeUnit;
-use common_time::{Date, DateTime, Interval, Timestamp};
+use common_time::{
+    Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
+};
 use datatypes::prelude::{ConcreteDataType, ValueRef};
 use datatypes::scalars::ScalarVector;
 use datatypes::types::{
@@ -456,13 +457,11 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
            TimeUnit::Microsecond => values.time_microsecond_values.push(val.value()),
            TimeUnit::Nanosecond => values.time_nanosecond_values.push(val.value()),
        },
-        Value::Interval(val) => match val.unit() {
-            IntervalUnit::YearMonth => values.interval_year_month_values.push(val.to_i32()),
-            IntervalUnit::DayTime => values.interval_day_time_values.push(val.to_i64()),
-            IntervalUnit::MonthDayNano => values
-                .interval_month_day_nano_values
-                .push(convert_i128_to_interval(val.to_i128())),
-        },
+        Value::IntervalYearMonth(val) => values.interval_year_month_values.push(val.to_i32()),
+        Value::IntervalDayTime(val) => values.interval_day_time_values.push(val.to_i64()),
+        Value::IntervalMonthDayNano(val) => values
+            .interval_month_day_nano_values
+            .push(convert_month_day_nano_to_pb(val)),
        Value::Decimal128(val) => values.decimal128_values.push(convert_to_pb_decimal128(val)),
        Value::List(_) | Value::Duration(_) => unreachable!(),
    });
@@ -507,14 +506,12 @@ fn ddl_request_type(request: &DdlRequest) -> &'static str {
    }
 }

-/// Converts an i128 value to google protobuf type [IntervalMonthDayNano].
-pub fn convert_i128_to_interval(v: i128) -> v1::IntervalMonthDayNano {
-    let interval = Interval::from_i128(v);
-    let (months, days, nanoseconds) = interval.to_month_day_nano();
+/// Converts an interval to google protobuf type [IntervalMonthDayNano].
+pub fn convert_month_day_nano_to_pb(v: IntervalMonthDayNano) -> v1::IntervalMonthDayNano {
    v1::IntervalMonthDayNano {
-        months,
-        days,
-        nanoseconds,
+        months: v.months,
+        days: v.days,
+        nanoseconds: v.nanoseconds,
    }
 }

@@ -562,11 +559,15 @@ pub fn pb_value_to_value_ref<'a>(
        ValueData::TimeMillisecondValue(t) => ValueRef::Time(Time::new_millisecond(*t)),
        ValueData::TimeMicrosecondValue(t) => ValueRef::Time(Time::new_microsecond(*t)),
        ValueData::TimeNanosecondValue(t) => ValueRef::Time(Time::new_nanosecond(*t)),
-        ValueData::IntervalYearMonthValue(v) => ValueRef::Interval(Interval::from_i32(*v)),
-        ValueData::IntervalDayTimeValue(v) => ValueRef::Interval(Interval::from_i64(*v)),
+        ValueData::IntervalYearMonthValue(v) => {
+            ValueRef::IntervalYearMonth(IntervalYearMonth::from_i32(*v))
+        }
+        ValueData::IntervalDayTimeValue(v) => {
+            ValueRef::IntervalDayTime(IntervalDayTime::from_i64(*v))
+        }
        ValueData::IntervalMonthDayNanoValue(v) => {
-            let interval = Interval::from_month_day_nano(v.months, v.days, v.nanoseconds);
-            ValueRef::Interval(interval)
+            let interval = IntervalMonthDayNano::new(v.months, v.days, v.nanoseconds);
+            ValueRef::IntervalMonthDayNano(interval)
        }
        ValueData::Decimal128Value(v) => {
            // get precision and scale from datatype_extension
@@ -657,7 +658,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
            IntervalType::MonthDayNano(_) => {
                Arc::new(IntervalMonthDayNanoVector::from_iter_values(
                    values.interval_month_day_nano_values.iter().map(|x| {
-                        Interval::from_month_day_nano(x.months, x.days, x.nanoseconds).to_i128()
+                        IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).to_i128()
                    }),
                ))
            }
@@ -802,18 +803,18 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
        ConcreteDataType::Interval(IntervalType::YearMonth(_)) => values
            .interval_year_month_values
            .into_iter()
-            .map(|v| Value::Interval(Interval::from_i32(v)))
+            .map(|v| Value::IntervalYearMonth(IntervalYearMonth::from_i32(v)))
            .collect(),
        ConcreteDataType::Interval(IntervalType::DayTime(_)) => values
            .interval_day_time_values
            .into_iter()
-            .map(|v| Value::Interval(Interval::from_i64(v)))
+            .map(|v| Value::IntervalDayTime(IntervalDayTime::from_i64(v)))
            .collect(),
        ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => values
            .interval_month_day_nano_values
            .into_iter()
            .map(|v| {
-                Value::Interval(Interval::from_month_day_nano(
+                Value::IntervalMonthDayNano(IntervalMonthDayNano::new(
                    v.months,
                    v.days,
                    v.nanoseconds,
@@ -941,18 +942,16 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
                value_data: Some(ValueData::TimeNanosecondValue(v.value())),
            },
        },
-        Value::Interval(v) => match v.unit() {
-            IntervalUnit::YearMonth => v1::Value {
-                value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
-            },
-            IntervalUnit::DayTime => v1::Value {
-                value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
-            },
-            IntervalUnit::MonthDayNano => v1::Value {
-                value_data: Some(ValueData::IntervalMonthDayNanoValue(
-                    convert_i128_to_interval(v.to_i128()),
-                )),
-            },
+        Value::IntervalYearMonth(v) => v1::Value {
+            value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
+        },
+        Value::IntervalDayTime(v) => v1::Value {
+            value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
+        },
+        Value::IntervalMonthDayNano(v) => v1::Value {
+            value_data: Some(ValueData::IntervalMonthDayNanoValue(
+                convert_month_day_nano_to_pb(v),
+            )),
        },
        Value::Decimal128(v) => v1::Value {
            value_data: Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
@@ -1044,13 +1043,11 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
                TimeUnit::Microsecond => ValueData::TimeMicrosecondValue(v.value()),
                TimeUnit::Nanosecond => ValueData::TimeNanosecondValue(v.value()),
            }),
-            Value::Interval(v) => Some(match v.unit() {
-                IntervalUnit::YearMonth => ValueData::IntervalYearMonthValue(v.to_i32()),
-                IntervalUnit::DayTime => ValueData::IntervalDayTimeValue(v.to_i64()),
-                IntervalUnit::MonthDayNano => {
-                    ValueData::IntervalMonthDayNanoValue(convert_i128_to_interval(v.to_i128()))
-                }
-            }),
+            Value::IntervalYearMonth(v) => Some(ValueData::IntervalYearMonthValue(v.to_i32())),
+            Value::IntervalDayTime(v) => Some(ValueData::IntervalDayTimeValue(v.to_i64())),
+            Value::IntervalMonthDayNano(v) => Some(ValueData::IntervalMonthDayNanoValue(
+                convert_month_day_nano_to_pb(v),
+            )),
            Value::Decimal128(v) => Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
            Value::List(_) | Value::Duration(_) => unreachable!(),
        },
@@ -1061,6 +1058,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
 mod tests {
    use std::sync::Arc;

+    use common_time::interval::IntervalUnit;
    use datatypes::types::{
        Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
        TimeMillisecondType, TimeSecondType, TimestampMillisecondType, TimestampSecondType,
@@ -1506,11 +1504,11 @@ mod tests {

    #[test]
    fn test_convert_i128_to_interval() {
-        let i128_val = 3000;
-        let interval = convert_i128_to_interval(i128_val);
+        let i128_val = 3;
+        let interval = convert_month_day_nano_to_pb(IntervalMonthDayNano::from_i128(i128_val));
        assert_eq!(interval.months, 0);
        assert_eq!(interval.days, 0);
-        assert_eq!(interval.nanoseconds, 3000);
+        assert_eq!(interval.nanoseconds, 3);
    }

    #[test]
@@ -1590,9 +1588,9 @@ mod tests {
            },
        );
        let expect = vec![
-            Value::Interval(Interval::from_year_month(1_i32)),
-            Value::Interval(Interval::from_year_month(2_i32)),
-            Value::Interval(Interval::from_year_month(3_i32)),
+            Value::IntervalYearMonth(IntervalYearMonth::new(1_i32)),
+            Value::IntervalYearMonth(IntervalYearMonth::new(2_i32)),
+            Value::IntervalYearMonth(IntervalYearMonth::new(3_i32)),
        ];
        assert_eq!(expect, actual);

@@ -1605,9 +1603,9 @@ mod tests {
            },
        );
        let expect = vec![
-            Value::Interval(Interval::from_i64(1_i64)),
-            Value::Interval(Interval::from_i64(2_i64)),
-            Value::Interval(Interval::from_i64(3_i64)),
+            Value::IntervalDayTime(IntervalDayTime::from_i64(1_i64)),
+            Value::IntervalDayTime(IntervalDayTime::from_i64(2_i64)),
+            Value::IntervalDayTime(IntervalDayTime::from_i64(3_i64)),
        ];
        assert_eq!(expect, actual);

@@ -1636,9 +1634,9 @@ mod tests {
            },
        );
        let expect = vec![
-            Value::Interval(Interval::from_month_day_nano(1, 2, 3)),
-            Value::Interval(Interval::from_month_day_nano(5, 6, 7)),
-            Value::Interval(Interval::from_month_day_nano(9, 10, 11)),
+            Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 2, 3)),
+            Value::IntervalMonthDayNano(IntervalMonthDayNano::new(5, 6, 7)),
+            Value::IntervalMonthDayNano(IntervalMonthDayNano::new(9, 10, 11)),
        ];
        assert_eq!(expect, actual);
    }
--- a/src/auth/src/common.rs
+++ b/src/auth/src/common.rs
@@ -75,6 +75,16 @@ pub enum Password<'a> {
    PgMD5(HashedPassword<'a>, Salt<'a>),
 }

+impl Password<'_> {
+    pub fn r#type(&self) -> &str {
+        match self {
+            Password::PlainText(_) => "plain_text",
+            Password::MysqlNativePassword(_, _) => "mysql_native_password",
+            Password::PgMD5(_, _) => "pg_md5",
+        }
+    }
+}
+
 pub fn auth_mysql(
    auth_data: HashedPassword,
    salt: Salt,
--- a/src/auth/src/error.rs
+++ b/src/auth/src/error.rs
@@ -89,7 +89,7 @@ impl ErrorExt for Error {
            Error::FileWatch { .. } => StatusCode::InvalidArguments,
            Error::InternalState { .. } => StatusCode::Unexpected,
            Error::Io { .. } => StatusCode::StorageUnavailable,
-            Error::AuthBackend { .. } => StatusCode::Internal,
+            Error::AuthBackend { source, .. } => source.status_code(),

            Error::UserNotFound { .. } => StatusCode::UserNotFound,
            Error::UnsupportedPasswordType { .. } => StatusCode::UnsupportedPasswordType,
--- a/src/auth/src/user_provider.rs
+++ b/src/auth/src/user_provider.rs
@@ -57,6 +57,11 @@ pub trait UserProvider: Send + Sync {
        self.authorize(catalog, schema, &user_info).await?;
        Ok(user_info)
    }
+
+    /// Returns whether this user provider implementation is backed by an external system.
+    fn external(&self) -> bool {
+        false
+    }
 }

 fn load_credential_from_file(filepath: &str) -> Result<Option<HashMap<String, Vec<u8>>>> {
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -22,6 +22,7 @@ common-config.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 common-meta.workspace = true
+common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
--- a/src/catalog/src/error.rs
+++ b/src/catalog/src/error.rs
@@ -50,13 +50,20 @@ pub enum Error {
        source: BoxedError,
    },

-    #[snafu(display("Failed to list nodes in cluster: {source}"))]
+    #[snafu(display("Failed to list nodes in cluster"))]
    ListNodes {
        #[snafu(implicit)]
        location: Location,
        source: BoxedError,
    },

+    #[snafu(display("Failed to region stats in cluster"))]
+    ListRegionStats {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+    },
+
    #[snafu(display("Failed to list flows in catalog {catalog}"))]
    ListFlows {
        #[snafu(implicit)]
@@ -82,6 +89,32 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to get information extension client"))]
+    GetInformationExtension {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to list procedures"))]
+    ListProcedures {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+    },
+
+    #[snafu(display("Procedure id not found"))]
+    ProcedureIdNotFound {
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("convert proto data error"))]
+    ConvertProtoData {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+    },
+
    #[snafu(display("Failed to re-compile script due to internal error"))]
    CompileScriptInternal {
        #[snafu(implicit)]
@@ -266,7 +299,9 @@ impl ErrorExt for Error {
            | Error::FindRegionRoutes { .. }
            | Error::CacheNotFound { .. }
            | Error::CastManager { .. }
-            | Error::Json { .. } => StatusCode::Unexpected,
+            | Error::Json { .. }
+            | Error::GetInformationExtension { .. }
+            | Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,

            Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,

@@ -283,7 +318,10 @@ impl ErrorExt for Error {
            | Error::ListNodes { source, .. }
            | Error::ListSchemas { source, .. }
            | Error::ListTables { source, .. }
-            | Error::ListFlows { source, .. } => source.status_code(),
+            | Error::ListFlows { source, .. }
+            | Error::ListProcedures { source, .. }
+            | Error::ListRegionStats { source, .. }
+            | Error::ConvertProtoData { source, .. } => source.status_code(),

            Error::CreateTable { source, .. } => source.status_code(),

--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -21,7 +21,6 @@ use common_catalog::consts::{
    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
    PG_CATALOG_NAME,
 };
-use common_config::Mode;
 use common_error::ext::BoxedError;
 use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef};
 use common_meta::key::catalog_name::CatalogNameKey;
@@ -31,9 +30,9 @@ use common_meta::key::table_info::TableInfoValue;
 use common_meta::key::table_name::TableNameKey;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
+use common_procedure::ProcedureManagerRef;
 use futures_util::stream::BoxStream;
 use futures_util::{StreamExt, TryStreamExt};
-use meta_client::client::MetaClient;
 use moka::sync::Cache;
 use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
 use session::context::{Channel, QueryContext};
@@ -49,7 +48,7 @@ use crate::error::{
    CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
    ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
 };
-use crate::information_schema::InformationSchemaProvider;
+use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
 use crate::kvbackend::TableCacheRef;
 use crate::system_schema::pg_catalog::PGCatalogProvider;
 use crate::system_schema::SystemSchemaProvider;
@@ -62,27 +61,31 @@ use crate::CatalogManager;
 /// comes from `SystemCatalog`, which is static and read-only.
 #[derive(Clone)]
 pub struct KvBackendCatalogManager {
-    mode: Mode,
-    meta_client: Option<Arc<MetaClient>>,
+    /// Provides the extension methods for the `information_schema` tables
+    information_extension: InformationExtensionRef,
+    /// Manages partition rules.
    partition_manager: PartitionRuleManagerRef,
+    /// Manages table metadata.
    table_metadata_manager: TableMetadataManagerRef,
    /// A sub-CatalogManager that handles system tables
    system_catalog: SystemCatalog,
+    /// Cache registry for all caches.
    cache_registry: LayeredCacheRegistryRef,
+    /// Only available in `Standalone` mode.
+    procedure_manager: Option<ProcedureManagerRef>,
 }

 const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;

 impl KvBackendCatalogManager {
    pub fn new(
-        mode: Mode,
-        meta_client: Option<Arc<MetaClient>>,
+        information_extension: InformationExtensionRef,
        backend: KvBackendRef,
        cache_registry: LayeredCacheRegistryRef,
+        procedure_manager: Option<ProcedureManagerRef>,
    ) -> Arc<Self> {
        Arc::new_cyclic(|me| Self {
-            mode,
-            meta_client,
+            information_extension,
            partition_manager: Arc::new(PartitionRuleManager::new(
                backend.clone(),
                cache_registry
@@ -106,23 +109,19 @@ impl KvBackendCatalogManager {
                backend,
            },
            cache_registry,
+            procedure_manager,
        })
    }

-    /// Returns the server running mode.
-    pub fn running_mode(&self) -> &Mode {
-        &self.mode
-    }
-
    pub fn view_info_cache(&self) -> Result<ViewInfoCacheRef> {
        self.cache_registry.get().context(CacheNotFoundSnafu {
            name: "view_info_cache",
        })
    }

-    /// Returns the `[MetaClient]`.
-    pub fn meta_client(&self) -> Option<Arc<MetaClient>> {
-        self.meta_client.clone()
+    /// Returns the [`InformationExtension`].
+    pub fn information_extension(&self) -> InformationExtensionRef {
+        self.information_extension.clone()
    }

    pub fn partition_manager(&self) -> PartitionRuleManagerRef {
@@ -132,6 +131,10 @@ impl KvBackendCatalogManager {
    pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
        &self.table_metadata_manager
    }
+
+    pub fn procedure_manager(&self) -> Option<ProcedureManagerRef> {
+        self.procedure_manager.clone()
+    }
 }

 #[async_trait::async_trait]
--- a/src/catalog/src/system_schema/information_schema.rs
+++ b/src/catalog/src/system_schema/information_schema.rs
@@ -18,7 +18,9 @@ pub mod flows;
 mod information_memory_table;
 pub mod key_column_usage;
 mod partitions;
+mod procedure_info;
 mod region_peers;
+mod region_statistics;
 mod runtime_metrics;
 pub mod schemata;
 mod table_constraints;
@@ -30,7 +32,11 @@ use std::collections::HashMap;
 use std::sync::{Arc, Weak};

 use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
+use common_error::ext::ErrorExt;
+use common_meta::cluster::NodeInfo;
+use common_meta::datanode::RegionStat;
 use common_meta::key::flow::FlowMetadataManager;
+use common_procedure::ProcedureInfo;
 use common_recordbatch::SendableRecordBatchStream;
 use datatypes::schema::SchemaRef;
 use lazy_static::lazy_static;
@@ -43,7 +49,7 @@ use views::InformationSchemaViews;

 use self::columns::InformationSchemaColumns;
 use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
-use crate::error::Result;
+use crate::error::{Error, Result};
 use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
 use crate::system_schema::information_schema::flows::InformationSchemaFlows;
 use crate::system_schema::information_schema::information_memory_table::get_schema_columns;
@@ -188,6 +194,16 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
                self.catalog_name.clone(),
                self.flow_metadata_manager.clone(),
            )) as _),
+            PROCEDURE_INFO => Some(
+                Arc::new(procedure_info::InformationSchemaProcedureInfo::new(
+                    self.catalog_manager.clone(),
+                )) as _,
+            ),
+            REGION_STATISTICS => Some(Arc::new(
+                region_statistics::InformationSchemaRegionStatistics::new(
+                    self.catalog_manager.clone(),
+                ),
+            ) as _),
            _ => None,
        }
    }
@@ -235,6 +251,14 @@ impl InformationSchemaProvider {
                CLUSTER_INFO.to_string(),
                self.build_table(CLUSTER_INFO).unwrap(),
            );
+            tables.insert(
+                PROCEDURE_INFO.to_string(),
+                self.build_table(PROCEDURE_INFO).unwrap(),
+            );
+            tables.insert(
+                REGION_STATISTICS.to_string(),
+                self.build_table(REGION_STATISTICS).unwrap(),
+            );
        }

        tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
@@ -250,7 +274,6 @@ impl InformationSchemaProvider {
            self.build_table(TABLE_CONSTRAINTS).unwrap(),
        );
        tables.insert(FLOWS.to_string(), self.build_table(FLOWS).unwrap());
-
        // Add memory tables
        for name in MEMORY_TABLES.iter() {
            tables.insert((*name).to_string(), self.build_table(name).expect(name));
@@ -299,3 +322,39 @@ where
        InformationTable::to_stream(self, request)
    }
 }
+
+pub type InformationExtensionRef = Arc<dyn InformationExtension<Error = Error> + Send + Sync>;
+
+/// The `InformationExtension` trait provides the extension methods for the `information_schema` tables.
+#[async_trait::async_trait]
+pub trait InformationExtension {
+    type Error: ErrorExt;
+
+    /// Gets the nodes information.
+    async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
+
+    /// Gets the procedures information.
+    async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error>;
+
+    /// Gets the region statistics.
+    async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
+}
+
+pub struct NoopInformationExtension;
+
+#[async_trait::async_trait]
+impl InformationExtension for NoopInformationExtension {
+    type Error = Error;
+
+    async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
+        Ok(vec![])
+    }
+
+    async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
+        Ok(vec![])
+    }
+
+    async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
+        Ok(vec![])
+    }
+}
--- a/src/catalog/src/system_schema/information_schema/cluster_info.rs
+++ b/src/catalog/src/system_schema/information_schema/cluster_info.rs
@@ -17,13 +17,10 @@ use std::time::Duration;

 use arrow_schema::SchemaRef as ArrowSchemaRef;
 use common_catalog::consts::INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID;
-use common_config::Mode;
 use common_error::ext::BoxedError;
-use common_meta::cluster::{ClusterInfo, NodeInfo, NodeStatus};
-use common_meta::peer::Peer;
+use common_meta::cluster::NodeInfo;
 use common_recordbatch::adapter::RecordBatchStreamAdapter;
 use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
-use common_telemetry::warn;
 use common_time::timestamp::Timestamp;
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
@@ -40,7 +37,7 @@ use snafu::ResultExt;
 use store_api::storage::{ScanRequest, TableId};

 use super::CLUSTER_INFO;
-use crate::error::{CreateRecordBatchSnafu, InternalSnafu, ListNodesSnafu, Result};
+use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
 use crate::system_schema::information_schema::{InformationTable, Predicates};
 use crate::system_schema::utils;
 use crate::CatalogManager;
@@ -70,7 +67,6 @@ const INIT_CAPACITY: usize = 42;
 pub(super) struct InformationSchemaClusterInfo {
    schema: SchemaRef,
    catalog_manager: Weak<dyn CatalogManager>,
-    start_time_ms: u64,
 }

 impl InformationSchemaClusterInfo {
@@ -78,7 +74,6 @@ impl InformationSchemaClusterInfo {
        Self {
            schema: Self::schema(),
            catalog_manager,
-            start_time_ms: common_time::util::current_time_millis() as u64,
        }
    }

@@ -100,11 +95,7 @@ impl InformationSchemaClusterInfo {
    }

    fn builder(&self) -> InformationSchemaClusterInfoBuilder {
-        InformationSchemaClusterInfoBuilder::new(
-            self.schema.clone(),
-            self.catalog_manager.clone(),
-            self.start_time_ms,
-        )
+        InformationSchemaClusterInfoBuilder::new(self.schema.clone(), self.catalog_manager.clone())
    }
 }

@@ -144,7 +135,6 @@ impl InformationTable for InformationSchemaClusterInfo {

 struct InformationSchemaClusterInfoBuilder {
    schema: SchemaRef,
-    start_time_ms: u64,
    catalog_manager: Weak<dyn CatalogManager>,

    peer_ids: Int64VectorBuilder,
@@ -158,11 +148,7 @@ struct InformationSchemaClusterInfoBuilder {
 }

 impl InformationSchemaClusterInfoBuilder {
-    fn new(
-        schema: SchemaRef,
-        catalog_manager: Weak<dyn CatalogManager>,
-        start_time_ms: u64,
-    ) -> Self {
+    fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
        Self {
            schema,
            catalog_manager,
@@ -174,56 +160,17 @@ impl InformationSchemaClusterInfoBuilder {
            start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
            uptimes: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            active_times: StringVectorBuilder::with_capacity(INIT_CAPACITY),
-            start_time_ms,
        }
    }

    /// Construct the `information_schema.cluster_info` virtual table
    async fn make_cluster_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
        let predicates = Predicates::from_scan_request(&request);
-        let mode = utils::running_mode(&self.catalog_manager)?.unwrap_or(Mode::Standalone);
-
-        match mode {
-            Mode::Standalone => {
-                let build_info = common_version::build_info();
-
-                self.add_node_info(
-                    &predicates,
-                    NodeInfo {
-                        // For the standalone:
-                        // - id always 0
-                        // - empty string for peer_addr
-                        peer: Peer {
-                            id: 0,
-                            addr: "".to_string(),
-                        },
-                        last_activity_ts: -1,
-                        status: NodeStatus::Standalone,
-                        version: build_info.version.to_string(),
-                        git_commit: build_info.commit_short.to_string(),
-                        // Use `self.start_time_ms` instead.
-                        // It's not precise but enough.
-                        start_time_ms: self.start_time_ms,
-                    },
-                );
-            }
-            Mode::Distributed => {
-                if let Some(meta_client) = utils::meta_client(&self.catalog_manager)? {
-                    let node_infos = meta_client
-                        .list_nodes(None)
-                        .await
-                        .map_err(BoxedError::new)
-                        .context(ListNodesSnafu)?;
-
-                    for node_info in node_infos {
-                        self.add_node_info(&predicates, node_info);
-                    }
-                } else {
-                    warn!("Could not find meta client in distributed mode.");
-                }
-            }
+        let information_extension = utils::information_extension(&self.catalog_manager)?;
+        let node_infos = information_extension.nodes().await?;
+        for node_info in node_infos {
+            self.add_node_info(&predicates, node_info);
        }
-
        self.finish()
    }

--- a/src/catalog/src/system_schema/information_schema/procedure_info.rs
+++ b/src/catalog/src/system_schema/information_schema/procedure_info.rs
@@ -0,0 +1,241 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_procedure::ProcedureInfo;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use common_time::timestamp::Timestamp;
+use datafusion::execution::TaskContext;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::timestamp::TimestampMillisecond;
+use datatypes::value::Value;
+use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder};
+use snafu::ResultExt;
+use store_api::storage::{ScanRequest, TableId};
+
+use super::PROCEDURE_INFO;
+use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
+use crate::system_schema::information_schema::{InformationTable, Predicates};
+use crate::system_schema::utils;
+use crate::CatalogManager;
+
+const PROCEDURE_ID: &str = "procedure_id";
+const PROCEDURE_TYPE: &str = "procedure_type";
+const START_TIME: &str = "start_time";
+const END_TIME: &str = "end_time";
+const STATUS: &str = "status";
+const LOCK_KEYS: &str = "lock_keys";
+
+const INIT_CAPACITY: usize = 42;
+
+/// The `PROCEDURE_INFO` table provides information about the current procedure information of the cluster.
+///
+/// - `procedure_id`: the unique identifier of the procedure.
+/// - `procedure_name`: the name of the procedure.
+/// - `start_time`: the starting execution time of the procedure.
+/// - `end_time`: the ending execution time of the procedure.
+/// - `status`: the status of the procedure.
+/// - `lock_keys`: the lock keys of the procedure.
+///
+pub(super) struct InformationSchemaProcedureInfo {
+    schema: SchemaRef,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaProcedureInfo {
+    pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_manager,
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(PROCEDURE_ID, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(PROCEDURE_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(
+                START_TIME,
+                ConcreteDataType::timestamp_millisecond_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                END_TIME,
+                ConcreteDataType::timestamp_millisecond_datatype(),
+                true,
+            ),
+            ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(LOCK_KEYS, ConcreteDataType::string_datatype(), true),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaProcedureInfoBuilder {
+        InformationSchemaProcedureInfoBuilder::new(
+            self.schema.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaProcedureInfo {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        PROCEDURE_INFO
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_procedure_info(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaProcedureInfoBuilder {
+    schema: SchemaRef,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    procedure_ids: StringVectorBuilder,
+    procedure_types: StringVectorBuilder,
+    start_times: TimestampMillisecondVectorBuilder,
+    end_times: TimestampMillisecondVectorBuilder,
+    statuses: StringVectorBuilder,
+    lock_keys: StringVectorBuilder,
+}
+
+impl InformationSchemaProcedureInfoBuilder {
+    fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema,
+            catalog_manager,
+            procedure_ids: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            procedure_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            end_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            lock_keys: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.procedure_info` virtual table
+    async fn make_procedure_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
+        let predicates = Predicates::from_scan_request(&request);
+        let information_extension = utils::information_extension(&self.catalog_manager)?;
+        let procedures = information_extension.procedures().await?;
+        for (status, procedure_info) in procedures {
+            self.add_procedure(&predicates, status, procedure_info);
+        }
+        self.finish()
+    }
+
+    fn add_procedure(
+        &mut self,
+        predicates: &Predicates,
+        status: String,
+        procedure_info: ProcedureInfo,
+    ) {
+        let ProcedureInfo {
+            id,
+            type_name,
+            start_time_ms,
+            end_time_ms,
+            lock_keys,
+            ..
+        } = procedure_info;
+        let pid = id.to_string();
+        let start_time = TimestampMillisecond(Timestamp::new_millisecond(start_time_ms));
+        let end_time = TimestampMillisecond(Timestamp::new_millisecond(end_time_ms));
+        let lock_keys = lock_keys.join(",");
+
+        let row = [
+            (PROCEDURE_ID, &Value::from(pid.clone())),
+            (PROCEDURE_TYPE, &Value::from(type_name.clone())),
+            (START_TIME, &Value::from(start_time)),
+            (END_TIME, &Value::from(end_time)),
+            (STATUS, &Value::from(status.clone())),
+            (LOCK_KEYS, &Value::from(lock_keys.clone())),
+        ];
+        if !predicates.eval(&row) {
+            return;
+        }
+        self.procedure_ids.push(Some(&pid));
+        self.procedure_types.push(Some(&type_name));
+        self.start_times.push(Some(start_time));
+        self.end_times.push(Some(end_time));
+        self.statuses.push(Some(&status));
+        self.lock_keys.push(Some(&lock_keys));
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.procedure_ids.finish()),
+            Arc::new(self.procedure_types.finish()),
+            Arc::new(self.start_times.finish()),
+            Arc::new(self.end_times.finish()),
+            Arc::new(self.statuses.finish()),
+            Arc::new(self.lock_keys.finish()),
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaProcedureInfo {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_procedure_info(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/system_schema/information_schema/region_peers.rs
+++ b/src/catalog/src/system_schema/information_schema/region_peers.rs
@@ -224,8 +224,8 @@ impl InformationSchemaRegionPeersBuilder {
            let region_id = RegionId::new(table_id, route.region.id.region_number()).as_u64();
            let peer_id = route.leader_peer.clone().map(|p| p.id);
            let peer_addr = route.leader_peer.clone().map(|p| p.addr);
-            let status = if let Some(status) = route.leader_status {
-                Some(status.as_ref().to_string())
+            let state = if let Some(state) = route.leader_state {
+                Some(state.as_ref().to_string())
            } else {
                // Alive by default
                Some("ALIVE".to_string())
@@ -242,7 +242,7 @@ impl InformationSchemaRegionPeersBuilder {
            self.peer_ids.push(peer_id);
            self.peer_addrs.push(peer_addr.as_deref());
            self.is_leaders.push(Some("Yes"));
-            self.statuses.push(status.as_deref());
+            self.statuses.push(state.as_deref());
            self.down_seconds
                .push(route.leader_down_millis().map(|m| m / 1000));
        }
--- a/src/catalog/src/system_schema/information_schema/region_statistics.rs
+++ b/src/catalog/src/system_schema/information_schema/region_statistics.rs
@@ -0,0 +1,237 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_meta::datanode::RegionStat;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{DfSendableRecordBatchStream, RecordBatch, SendableRecordBatchStream};
+use datafusion::execution::TaskContext;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder};
+use snafu::ResultExt;
+use store_api::storage::{ScanRequest, TableId};
+
+use super::{InformationTable, REGION_STATISTICS};
+use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
+use crate::information_schema::Predicates;
+use crate::system_schema::utils;
+use crate::CatalogManager;
+
+const REGION_ID: &str = "region_id";
+const TABLE_ID: &str = "table_id";
+const REGION_NUMBER: &str = "region_number";
+const MEMTABLE_SIZE: &str = "memtable_size";
+const MANIFEST_SIZE: &str = "manifest_size";
+const SST_SIZE: &str = "sst_size";
+const ENGINE: &str = "engine";
+const REGION_ROLE: &str = "region_role";
+
+const INIT_CAPACITY: usize = 42;
+
+/// The `REGION_STATISTICS` table provides information about the region statistics. Including fields:
+///
+/// - `region_id`: The region id.
+/// - `table_id`: The table id.
+/// - `region_number`: The region number.
+/// - `memtable_size`: The memtable size in bytes.
+/// - `manifest_size`: The manifest size in bytes.
+/// - `sst_size`: The sst size in bytes.
+/// - `engine`: The engine type.
+/// - `region_role`: The region role.
+///
+pub(super) struct InformationSchemaRegionStatistics {
+    schema: SchemaRef,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaRegionStatistics {
+    pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_manager,
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
+            ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
+            ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
+            ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
+            ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
+            ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
+            ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaRegionStatisticsBuilder {
+        InformationSchemaRegionStatisticsBuilder::new(
+            self.schema.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaRegionStatistics {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        REGION_STATISTICS
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_region_statistics(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaRegionStatisticsBuilder {
+    schema: SchemaRef,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    region_ids: UInt64VectorBuilder,
+    table_ids: UInt32VectorBuilder,
+    region_numbers: UInt32VectorBuilder,
+    memtable_sizes: UInt64VectorBuilder,
+    manifest_sizes: UInt64VectorBuilder,
+    sst_sizes: UInt64VectorBuilder,
+    engines: StringVectorBuilder,
+    region_roles: StringVectorBuilder,
+}
+
+impl InformationSchemaRegionStatisticsBuilder {
+    fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema,
+            catalog_manager,
+            region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
+            region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
+            memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct a new `InformationSchemaRegionStatistics` from the collected data.
+    async fn make_region_statistics(
+        &mut self,
+        request: Option<ScanRequest>,
+    ) -> Result<RecordBatch> {
+        let predicates = Predicates::from_scan_request(&request);
+        let information_extension = utils::information_extension(&self.catalog_manager)?;
+        let region_stats = information_extension.region_stats().await?;
+        for region_stat in region_stats {
+            self.add_region_statistic(&predicates, region_stat);
+        }
+        self.finish()
+    }
+
+    fn add_region_statistic(&mut self, predicate: &Predicates, region_stat: RegionStat) {
+        let row = [
+            (REGION_ID, &Value::from(region_stat.id.as_u64())),
+            (TABLE_ID, &Value::from(region_stat.id.table_id())),
+            (REGION_NUMBER, &Value::from(region_stat.id.region_number())),
+            (MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
+            (MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
+            (SST_SIZE, &Value::from(region_stat.sst_size)),
+            (ENGINE, &Value::from(region_stat.engine.as_str())),
+            (REGION_ROLE, &Value::from(region_stat.role.to_string())),
+        ];
+
+        if !predicate.eval(&row) {
+            return;
+        }
+
+        self.region_ids.push(Some(region_stat.id.as_u64()));
+        self.table_ids.push(Some(region_stat.id.table_id()));
+        self.region_numbers
+            .push(Some(region_stat.id.region_number()));
+        self.memtable_sizes.push(Some(region_stat.memtable_size));
+        self.manifest_sizes.push(Some(region_stat.manifest_size));
+        self.sst_sizes.push(Some(region_stat.sst_size));
+        self.engines.push(Some(&region_stat.engine));
+        self.region_roles.push(Some(&region_stat.role.to_string()));
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.region_ids.finish()),
+            Arc::new(self.table_ids.finish()),
+            Arc::new(self.region_numbers.finish()),
+            Arc::new(self.memtable_sizes.finish()),
+            Arc::new(self.manifest_sizes.finish()),
+            Arc::new(self.sst_sizes.finish()),
+            Arc::new(self.engines.finish()),
+            Arc::new(self.region_roles.finish()),
+        ];
+
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaRegionStatistics {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_region_statistics(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/system_schema/information_schema/table_names.rs
+++ b/src/catalog/src/system_schema/information_schema/table_names.rs
@@ -45,3 +45,5 @@ pub const TABLE_CONSTRAINTS: &str = "table_constraints";
 pub const CLUSTER_INFO: &str = "cluster_info";
 pub const VIEWS: &str = "views";
 pub const FLOWS: &str = "flows";
+pub const PROCEDURE_INFO: &str = "procedure_info";
+pub const REGION_STATISTICS: &str = "region_statistics";
--- a/src/catalog/src/system_schema/utils.rs
+++ b/src/catalog/src/system_schema/utils.rs
@@ -12,47 +12,33 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-pub mod tables;
+use std::sync::Weak;

-use std::sync::{Arc, Weak};
-
-use common_config::Mode;
 use common_meta::key::TableMetadataManagerRef;
-use meta_client::client::MetaClient;
 use snafu::OptionExt;

-use crate::error::{Result, UpgradeWeakCatalogManagerRefSnafu};
+use crate::error::{GetInformationExtensionSnafu, Result, UpgradeWeakCatalogManagerRefSnafu};
+use crate::information_schema::InformationExtensionRef;
 use crate::kvbackend::KvBackendCatalogManager;
 use crate::CatalogManager;

-/// Try to get the server running mode from `[CatalogManager]` weak reference.
-pub fn running_mode(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Mode>> {
+pub mod tables;
+
+/// Try to get the `[InformationExtension]` from `[CatalogManager]` weak reference.
+pub fn information_extension(
+    catalog_manager: &Weak<dyn CatalogManager>,
+) -> Result<InformationExtensionRef> {
    let catalog_manager = catalog_manager
        .upgrade()
        .context(UpgradeWeakCatalogManagerRefSnafu)?;

-    Ok(catalog_manager
+    let information_extension = catalog_manager
        .as_any()
        .downcast_ref::<KvBackendCatalogManager>()
-        .map(|manager| manager.running_mode())
-        .copied())
-}
+        .map(|manager| manager.information_extension())
+        .context(GetInformationExtensionSnafu)?;

-/// Try to get the `[MetaClient]` from `[CatalogManager]` weak reference.
-pub fn meta_client(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Arc<MetaClient>>> {
-    let catalog_manager = catalog_manager
-        .upgrade()
-        .context(UpgradeWeakCatalogManagerRefSnafu)?;
-
-    let meta_client = match catalog_manager
-        .as_any()
-        .downcast_ref::<KvBackendCatalogManager>()
-    {
-        None => None,
-        Some(manager) => manager.meta_client(),
-    };
-
-    Ok(meta_client)
+    Ok(information_extension)
 }

 /// Try to get the `[TableMetadataManagerRef]` from `[CatalogManager]` weak reference.
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -259,7 +259,6 @@ mod tests {

    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
    use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
-    use common_config::Mode;
    use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
    use common_meta::key::TableMetadataManager;
    use common_meta::kv_backend::memory::MemoryKvBackend;
@@ -269,6 +268,8 @@ mod tests {
    use datafusion::logical_expr::builder::LogicalTableSource;
    use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};

+    use crate::information_schema::NoopInformationExtension;
+
    struct MockDecoder;
    impl MockDecoder {
        pub fn arc() -> Arc<Self> {
@@ -323,10 +324,10 @@ mod tests {
        );

        let catalog_manager = KvBackendCatalogManager::new(
-            Mode::Standalone,
-            None,
+            Arc::new(NoopInformationExtension),
            backend.clone(),
            layered_cache_registry,
+            None,
        );
        let table_metadata_manager = TableMetadataManager::new(backend);
        let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -10,7 +10,7 @@ name = "greptime"
 path = "src/bin/greptime.rs"

 [features]
-default = ["python"]
+default = ["python", "servers/pprof", "servers/mem-prof"]
 tokio-console = ["common-telemetry/tokio-console"]
 python = ["frontend/python"]

--- a/src/cmd/src/cli/bench.rs
+++ b/src/cmd/src/cli/bench.rs
@@ -158,7 +158,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
                addr: String::new(),
            }),
            follower_peers: vec![],
-            leader_status: None,
+            leader_state: None,
            leader_down_since: None,
        });
    }
--- a/src/cmd/src/cli/repl.rs
+++ b/src/cmd/src/cli/repl.rs
@@ -35,7 +35,6 @@ use either::Either;
 use meta_client::client::MetaClientBuilder;
 use query::datafusion::DatafusionQueryEngine;
 use query::parser::QueryLanguageParser;
-use query::plan::LogicalPlan;
 use query::query_engine::{DefaultSerializer, QueryEngineState};
 use query::QueryEngine;
 use rustyline::error::ReadlineError;
@@ -47,12 +46,12 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
 use crate::cli::cmd::ReplCommand;
 use crate::cli::helper::RustylineHelper;
 use crate::cli::AttachCommand;
-use crate::error;
 use crate::error::{
    CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
    ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
    SubstraitEncodeLogicalPlanSnafu,
 };
+use crate::{error, DistributedInformationExtension};

 /// Captures the state of the repl, gathers commands and executes them one by one
 pub struct Repl {
@@ -179,7 +178,7 @@ impl Repl {
                .await
                .context(PlanStatementSnafu)?;

-            let LogicalPlan::DfPlan(plan) = query_engine
+            let plan = query_engine
                .optimize(&query_engine.engine_context(query_ctx), &plan)
                .context(PlanStatementSnafu)?;

@@ -276,11 +275,12 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
        .build(),
    );

+    let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
    let catalog_manager = KvBackendCatalogManager::new(
-        Mode::Distributed,
-        Some(meta_client.clone()),
+        information_extension,
        cached_meta_backend.clone(),
        layered_cache_registry,
+        None,
    );
    let plugins: Plugins = Default::default();
    let state = Arc::new(QueryEngineState::new(
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -41,7 +41,7 @@ use crate::error::{
    MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
 };
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{log_versions, App};
+use crate::{log_versions, App, DistributedInformationExtension};

 pub const APP_NAME: &str = "greptime-flownode";

@@ -269,11 +269,13 @@ impl StartCommand {
            .build(),
        );

+        let information_extension =
+            Arc::new(DistributedInformationExtension::new(meta_client.clone()));
        let catalog_manager = KvBackendCatalogManager::new(
-            opts.mode,
-            Some(meta_client.clone()),
+            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
+            None,
        );

        let table_metadata_manager =
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -36,8 +36,8 @@ use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{FrontendInstance, Instance as FeInstance};
 use frontend::server::Services;
 use meta_client::{MetaClientOptions, MetaClientType};
+use query::stats::StatementStatistics;
 use servers::tls::{TlsMode, TlsOption};
-use servers::Mode;
 use snafu::{OptionExt, ResultExt};
 use tracing_appender::non_blocking::WorkerGuard;

@@ -46,7 +46,7 @@ use crate::error::{
    Result, StartFrontendSnafu,
 };
 use crate::options::{GlobalOptions, GreptimeOptions};
-use crate::{log_versions, App};
+use crate::{log_versions, App, DistributedInformationExtension};

 type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;

@@ -315,11 +315,13 @@ impl StartCommand {
            .build(),
        );

+        let information_extension =
+            Arc::new(DistributedInformationExtension::new(meta_client.clone()));
        let catalog_manager = KvBackendCatalogManager::new(
-            Mode::Distributed,
-            Some(meta_client.clone()),
+            information_extension,
            cached_meta_backend.clone(),
            layered_cache_registry.clone(),
+            None,
        );

        let executor = HandlerGroupExecutor::new(vec![
@@ -351,6 +353,7 @@ impl StartCommand {
            catalog_manager,
            Arc::new(client),
            meta_client,
+            StatementStatistics::new(opts.logging.slow_query.clone()),
        )
        .with_plugin(plugins.clone())
        .with_local_cache_invalidator(layered_cache_registry)
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -15,7 +15,17 @@
 #![feature(assert_matches, let_chains)]

 use async_trait::async_trait;
+use catalog::information_schema::InformationExtension;
+use client::api::v1::meta::ProcedureStatus;
+use common_error::ext::BoxedError;
+use common_meta::cluster::{ClusterInfo, NodeInfo};
+use common_meta::datanode::RegionStat;
+use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
+use common_meta::rpc::procedure;
+use common_procedure::{ProcedureInfo, ProcedureState};
 use common_telemetry::{error, info};
+use meta_client::MetaClientRef;
+use snafu::ResultExt;

 use crate::error::Result;

@@ -94,3 +104,69 @@ fn log_env_flags() {
        info!("argument: {}", argument);
    }
 }
+
+pub struct DistributedInformationExtension {
+    meta_client: MetaClientRef,
+}
+
+impl DistributedInformationExtension {
+    pub fn new(meta_client: MetaClientRef) -> Self {
+        Self { meta_client }
+    }
+}
+
+#[async_trait::async_trait]
+impl InformationExtension for DistributedInformationExtension {
+    type Error = catalog::error::Error;
+
+    async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
+        self.meta_client
+            .list_nodes(None)
+            .await
+            .map_err(BoxedError::new)
+            .context(catalog::error::ListNodesSnafu)
+    }
+
+    async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
+        let procedures = self
+            .meta_client
+            .list_procedures(&ExecutorContext::default())
+            .await
+            .map_err(BoxedError::new)
+            .context(catalog::error::ListProceduresSnafu)?
+            .procedures;
+        let mut result = Vec::with_capacity(procedures.len());
+        for procedure in procedures {
+            let pid = match procedure.id {
+                Some(pid) => pid,
+                None => return catalog::error::ProcedureIdNotFoundSnafu {}.fail(),
+            };
+            let pid = procedure::pb_pid_to_pid(&pid)
+                .map_err(BoxedError::new)
+                .context(catalog::error::ConvertProtoDataSnafu)?;
+            let status = ProcedureStatus::try_from(procedure.status)
+                .map(|v| v.as_str_name())
+                .unwrap_or("Unknown")
+                .to_string();
+            let procedure_info = ProcedureInfo {
+                id: pid,
+                type_name: procedure.type_name,
+                start_time_ms: procedure.start_time_ms,
+                end_time_ms: procedure.end_time_ms,
+                state: ProcedureState::Running,
+                lock_keys: procedure.lock_keys,
+            };
+            result.push((status, procedure_info));
+        }
+
+        Ok(result)
+    }
+
+    async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
+        self.meta_client
+            .list_region_stats()
+            .await
+            .map_err(BoxedError::new)
+            .context(catalog::error::ListRegionStatsSnafu)
+    }
+}
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -48,6 +48,10 @@ impl Instance {
            _guard: guard,
        }
    }
+
+    pub fn get_inner(&self) -> &MetasrvInstance {
+        &self.instance
+    }
 }

 #[async_trait]
@@ -86,6 +90,14 @@ impl Command {
    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
        self.subcmd.load_options(global_options)
    }
+
+    pub fn config_file(&self) -> &Option<String> {
+        self.subcmd.config_file()
+    }
+
+    pub fn env_prefix(&self) -> &String {
+        self.subcmd.env_prefix()
+    }
 }

 #[derive(Parser)]
@@ -105,6 +117,18 @@ impl SubCommand {
            SubCommand::Start(cmd) => cmd.load_options(global_options),
        }
    }
+
+    fn config_file(&self) -> &Option<String> {
+        match self {
+            SubCommand::Start(cmd) => &cmd.config_file,
+        }
+    }
+
+    fn env_prefix(&self) -> &String {
+        match self {
+            SubCommand::Start(cmd) => &cmd.env_prefix,
+        }
+    }
 }

 #[derive(Debug, Default, Parser)]
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -17,14 +17,18 @@ use std::{fs, path};

 use async_trait::async_trait;
 use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
+use catalog::information_schema::InformationExtension;
 use catalog::kvbackend::KvBackendCatalogManager;
 use clap::Parser;
+use client::api::v1::meta::RegionRole;
 use common_base::Plugins;
 use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
 use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
 use common_error::ext::BoxedError;
 use common_meta::cache::LayeredCacheRegistryBuilder;
 use common_meta::cache_invalidator::CacheInvalidatorRef;
+use common_meta::cluster::{NodeInfo, NodeStatus};
+use common_meta::datanode::RegionStat;
 use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
 use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
 use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
@@ -33,10 +37,11 @@ use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 use common_meta::node_manager::NodeManagerRef;
+use common_meta::peer::Peer;
 use common_meta::region_keeper::MemoryRegionKeeper;
 use common_meta::sequence::SequenceBuilder;
 use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
-use common_procedure::ProcedureManagerRef;
+use common_procedure::{ProcedureInfo, ProcedureManagerRef};
 use common_telemetry::info;
 use common_telemetry::logging::{LoggingOptions, TracingOptions};
 use common_time::timezone::set_default_timezone;
@@ -44,6 +49,7 @@ use common_version::{short_version, version};
 use common_wal::config::DatanodeWalConfig;
 use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
 use datanode::datanode::{Datanode, DatanodeBuilder};
+use datanode::region_server::RegionServer;
 use file_engine::config::EngineConfig as FileEngineConfig;
 use flow::{FlowWorkerManager, FlownodeBuilder, FrontendInvoker};
 use frontend::frontend::FrontendOptions;
@@ -55,6 +61,7 @@ use frontend::service_config::{
 };
 use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
 use mito2::config::MitoConfig;
+use query::stats::StatementStatistics;
 use serde::{Deserialize, Serialize};
 use servers::export_metrics::ExportMetricsOption;
 use servers::grpc::GrpcOptions;
@@ -477,22 +484,26 @@ impl StartCommand {
            .build(),
        );

-        let catalog_manager = KvBackendCatalogManager::new(
-            dn_opts.mode,
-            None,
-            kv_backend.clone(),
-            layered_cache_registry.clone(),
-        );
-
-        let table_metadata_manager =
-            Self::create_table_metadata_manager(kv_backend.clone()).await?;
-
        let datanode = DatanodeBuilder::new(dn_opts, plugins.clone())
            .with_kv_backend(kv_backend.clone())
            .build()
            .await
            .context(StartDatanodeSnafu)?;

+        let information_extension = Arc::new(StandaloneInformationExtension::new(
+            datanode.region_server(),
+            procedure_manager.clone(),
+        ));
+        let catalog_manager = KvBackendCatalogManager::new(
+            information_extension,
+            kv_backend.clone(),
+            layered_cache_registry.clone(),
+            Some(procedure_manager.clone()),
+        );
+
+        let table_metadata_manager =
+            Self::create_table_metadata_manager(kv_backend.clone()).await?;
+
        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
        let flow_builder = FlownodeBuilder::new(
            Default::default(),
@@ -556,6 +567,7 @@ impl StartCommand {
            catalog_manager.clone(),
            node_manager.clone(),
            ddl_task_executor.clone(),
+            StatementStatistics::new(opts.logging.slow_query.clone()),
        )
        .with_plugin(plugins.clone())
        .try_build()
@@ -641,6 +653,91 @@ impl StartCommand {
    }
 }

+pub struct StandaloneInformationExtension {
+    region_server: RegionServer,
+    procedure_manager: ProcedureManagerRef,
+    start_time_ms: u64,
+}
+
+impl StandaloneInformationExtension {
+    pub fn new(region_server: RegionServer, procedure_manager: ProcedureManagerRef) -> Self {
+        Self {
+            region_server,
+            procedure_manager,
+            start_time_ms: common_time::util::current_time_millis() as u64,
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl InformationExtension for StandaloneInformationExtension {
+    type Error = catalog::error::Error;
+
+    async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
+        let build_info = common_version::build_info();
+        let node_info = NodeInfo {
+            // For the standalone:
+            // - id always 0
+            // - empty string for peer_addr
+            peer: Peer {
+                id: 0,
+                addr: "".to_string(),
+            },
+            last_activity_ts: -1,
+            status: NodeStatus::Standalone,
+            version: build_info.version.to_string(),
+            git_commit: build_info.commit_short.to_string(),
+            // Use `self.start_time_ms` instead.
+            // It's not precise but enough.
+            start_time_ms: self.start_time_ms,
+        };
+        Ok(vec![node_info])
+    }
+
+    async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
+        self.procedure_manager
+            .list_procedures()
+            .await
+            .map_err(BoxedError::new)
+            .map(|procedures| {
+                procedures
+                    .into_iter()
+                    .map(|procedure| {
+                        let status = procedure.state.as_str_name().to_string();
+                        (status, procedure)
+                    })
+                    .collect::<Vec<_>>()
+            })
+            .context(catalog::error::ListProceduresSnafu)
+    }
+
+    async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
+        let stats = self
+            .region_server
+            .reportable_regions()
+            .into_iter()
+            .map(|stat| {
+                let region_stat = self
+                    .region_server
+                    .region_statistic(stat.region_id)
+                    .unwrap_or_default();
+                RegionStat {
+                    id: stat.region_id,
+                    rcus: 0,
+                    wcus: 0,
+                    approximate_bytes: region_stat.estimated_disk_size() as i64,
+                    engine: stat.engine,
+                    role: RegionRole::from(stat.role).into(),
+                    memtable_size: region_stat.memtable_size,
+                    manifest_size: region_stat.manifest_size,
+                    sst_size: region_stat.sst_size,
+                }
+            })
+            .collect::<Vec<_>>();
+        Ok(stats)
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::default::Default;
--- a/src/common/base/Cargo.toml
+++ b/src/common/base/Cargo.toml
@@ -8,7 +8,7 @@ license.workspace = true
 workspace = true

 [dependencies]
-anymap = "1.0.0-beta.2"
+anymap2 = "0.13"
 async-trait.workspace = true
 bitvec = "1.0"
 bytes.workspace = true
--- a/src/common/base/src/plugins.rs
+++ b/src/common/base/src/plugins.rs
@@ -12,20 +12,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::any::Any;
 use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};

-/// [`Plugins`] is a wrapper of [AnyMap](https://github.com/chris-morgan/anymap) and provides a thread-safe way to store and retrieve plugins.
+use anymap2::SendSyncAnyMap;
+
+/// [`Plugins`] is a wrapper of [anymap2](https://github.com/azriel91/anymap2) and provides a thread-safe way to store and retrieve plugins.
 /// Make it Cloneable and we can treat it like an Arc struct.
 #[derive(Default, Clone)]
 pub struct Plugins {
-    inner: Arc<RwLock<anymap::Map<dyn Any + Send + Sync>>>,
+    inner: Arc<RwLock<SendSyncAnyMap>>,
 }

 impl Plugins {
    pub fn new() -> Self {
        Self {
-            inner: Arc::new(RwLock::new(anymap::Map::new())),
+            inner: Arc::new(RwLock::new(SendSyncAnyMap::new())),
        }
    }

@@ -37,6 +38,18 @@ impl Plugins {
        self.read().get::<T>().cloned()
    }

+    pub fn get_or_insert<T, F>(&self, f: F) -> T
+    where
+        T: 'static + Send + Sync + Clone,
+        F: FnOnce() -> T,
+    {
+        let mut binding = self.write();
+        if !binding.contains::<T>() {
+            binding.insert(f());
+        }
+        binding.get::<T>().cloned().unwrap()
+    }
+
    pub fn map_mut<T: 'static + Send + Sync, F, R>(&self, mapper: F) -> R
    where
        F: FnOnce(Option<&mut T>) -> R,
@@ -61,11 +74,11 @@ impl Plugins {
        self.read().is_empty()
    }

-    fn read(&self) -> RwLockReadGuard<anymap::Map<dyn Any + Send + Sync>> {
+    fn read(&self) -> RwLockReadGuard<SendSyncAnyMap> {
        self.inner.read().unwrap()
    }

-    fn write(&self) -> RwLockWriteGuard<anymap::Map<dyn Any + Send + Sync>> {
+    fn write(&self) -> RwLockWriteGuard<SendSyncAnyMap> {
        self.inner.write().unwrap()
    }
 }
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -98,6 +98,11 @@ pub const INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID: u32 = 31;
 pub const INFORMATION_SCHEMA_VIEW_TABLE_ID: u32 = 32;
 /// id for information_schema.FLOWS
 pub const INFORMATION_SCHEMA_FLOW_TABLE_ID: u32 = 33;
+/// id for information_schema.procedure_info
+pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
+/// id for information_schema.region_statistics
+pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
+
 /// ----- End of information_schema tables -----

 /// ----- Begin of pg_catalog tables -----
--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -38,6 +38,8 @@ pub enum StatusCode {
    Cancelled = 1005,
    /// Illegal state, can be exposed to users.
    IllegalState = 1006,
+    /// Caused by some error originated from external system.
+    External = 1007,
    // ====== End of common status code ================

    // ====== Begin of SQL related status code =========
@@ -162,7 +164,8 @@ impl StatusCode {
            | StatusCode::InvalidAuthHeader
            | StatusCode::AccessDenied
            | StatusCode::PermissionDenied
-            | StatusCode::RequestOutdated => false,
+            | StatusCode::RequestOutdated
+            | StatusCode::External => false,
        }
    }

@@ -177,7 +180,9 @@ impl StatusCode {
            | StatusCode::IllegalState
            | StatusCode::EngineExecuteQuery
            | StatusCode::StorageUnavailable
-            | StatusCode::RuntimeResourcesExhausted => true,
+            | StatusCode::RuntimeResourcesExhausted
+            | StatusCode::External => true,
+
            StatusCode::Success
            | StatusCode::Unsupported
            | StatusCode::InvalidArguments
@@ -256,7 +261,7 @@ macro_rules! define_into_tonic_status {
 pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
    match status_code {
        StatusCode::Success => Code::Ok,
-        StatusCode::Unknown => Code::Unknown,
+        StatusCode::Unknown | StatusCode::External => Code::Unknown,
        StatusCode::Unsupported => Code::Unimplemented,
        StatusCode::Unexpected
        | StatusCode::IllegalState
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -9,7 +9,7 @@ workspace = true

 [features]
 default = ["geo"]
-geo = ["geohash", "h3o"]
+geo = ["geohash", "h3o", "s2"]

 [dependencies]
 api.workspace = true
@@ -27,6 +27,7 @@ common-time.workspace = true
 common-version.workspace = true
 datafusion.workspace = true
 datatypes.workspace = true
+derive_more = { version = "1", default-features = false, features = ["display"] }
 geohash = { version = "0.13", optional = true }
 h3o = { version = "0.6", optional = true }
 jsonb.workspace = true
@@ -34,6 +35,7 @@ num = "0.4"
 num-traits = "0.2"
 once_cell.workspace = true
 paste = "1.0"
+s2 = { version = "0.0.12", optional = true }
 serde.workspace = true
 serde_json.workspace = true
 session.workspace = true
--- a/src/common/function/src/scalars/aggregate.rs
+++ b/src/common/function/src/scalars/aggregate.rs
@@ -16,7 +16,6 @@ mod argmax;
 mod argmin;
 mod diff;
 mod mean;
-mod percentile;
 mod polyval;
 mod scipy_stats_norm_cdf;
 mod scipy_stats_norm_pdf;
@@ -28,7 +27,6 @@ pub use argmin::ArgminAccumulatorCreator;
 use common_query::logical_plan::AggregateFunctionCreatorRef;
 pub use diff::DiffAccumulatorCreator;
 pub use mean::MeanAccumulatorCreator;
-pub use percentile::PercentileAccumulatorCreator;
 pub use polyval::PolyvalAccumulatorCreator;
 pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
 pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
@@ -91,8 +89,14 @@ impl AggregateFunctions {
        register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
        register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
        register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
-        register_aggr_func!("percentile", 2, PercentileAccumulatorCreator);
        register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
        register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
+
+        #[cfg(feature = "geo")]
+        register_aggr_func!(
+            "json_encode_path",
+            3,
+            super::geo::encoding::JsonPathEncodeFunctionCreator
+        );
    }
 }
--- a/src/common/function/src/scalars/aggregate/argmax.rs
+++ b/src/common/function/src/scalars/aggregate/argmax.rs
@@ -16,7 +16,10 @@ use std::cmp::Ordering;
 use std::sync::Arc;

 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
-use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
+use common_query::error::{
+    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
+};
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/aggregate/argmin.rs
+++ b/src/common/function/src/scalars/aggregate/argmin.rs
@@ -16,7 +16,10 @@ use std::cmp::Ordering;
 use std::sync::Arc;

 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
-use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
+use common_query::error::{
+    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
+};
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/aggregate/diff.rs
+++ b/src/common/function/src/scalars/aggregate/diff.rs
@@ -17,8 +17,10 @@ use std::sync::Arc;

 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
-    CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
+    CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
+    Result,
 };
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/aggregate/mean.rs
+++ b/src/common/function/src/scalars/aggregate/mean.rs
@@ -17,8 +17,10 @@ use std::sync::Arc;

 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
-    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, Result,
+    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
+    Result,
 };
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/aggregate/percentile.rs
+++ b/src/common/function/src/scalars/aggregate/percentile.rs
@@ -1,436 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::cmp::Reverse;
-use std::collections::BinaryHeap;
-use std::sync::Arc;
-
-use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
-use common_query::error::{
-    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
-    FromScalarValueSnafu, InvalidInputColSnafu, Result,
-};
-use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
-use common_query::prelude::*;
-use datatypes::prelude::*;
-use datatypes::types::OrdPrimitive;
-use datatypes::value::{ListValue, OrderedFloat};
-use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
-use datatypes::with_match_primitive_type_id;
-use num::NumCast;
-use snafu::{ensure, OptionExt, ResultExt};
-
-// https://numpy.org/doc/stable/reference/generated/numpy.percentile.html?highlight=percentile#numpy.percentile
-// if the p is 50,then the Percentile become median
-// we use two heap great and not_greater
-// the not_greater push the value that smaller than P-value
-// the greater push the value that bigger than P-value
-// just like the percentile in numpy:
-// Given a vector V of length N, the q-th percentile of V is the value q/100 of the way from the minimum to the maximum in a sorted copy of V.
-// The values and distances of the two nearest neighbors as well as the method parameter will determine the percentile
-// if the normalized ranking does not match the location of q exactly.
-// This function is the same as the median if q=50, the same as the minimum if q=0 and the same as the maximum if q=100.
-// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
-// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
-//              i+g = (q-alpha)/(n-alpha-beta+1)
-// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
-// With 'i' being the floor and 'g' the fractional part of the result.
-// the default method is linear where
-// alpha = 1
-// beta = 1
-#[derive(Debug, Default)]
-pub struct Percentile<T>
-where
-    T: WrapperType,
-{
-    greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
-    not_greater: BinaryHeap<OrdPrimitive<T>>,
-    n: u64,
-    p: Option<f64>,
-}
-
-impl<T> Percentile<T>
-where
-    T: WrapperType,
-{
-    fn push(&mut self, value: T) {
-        let value = OrdPrimitive::<T>(value);
-
-        self.n += 1;
-        if self.not_greater.is_empty() {
-            self.not_greater.push(value);
-            return;
-        }
-        // to keep the not_greater length == floor+1
-        // so to ensure the peek of the not_greater is array[floor]
-        // and the peek of the greater is array[floor+1]
-        let p = self.p.unwrap_or(0.0_f64);
-        let floor = (((self.n - 1) as f64) * p / (100_f64)).floor();
-        if value <= *self.not_greater.peek().unwrap() {
-            self.not_greater.push(value);
-            if self.not_greater.len() > (floor + 1.0) as usize {
-                self.greater.push(Reverse(self.not_greater.pop().unwrap()));
-            }
-        } else {
-            self.greater.push(Reverse(value));
-            if self.not_greater.len() < (floor + 1.0) as usize {
-                self.not_greater.push(self.greater.pop().unwrap().0);
-            }
-        }
-    }
-}
-
-impl<T> Accumulator for Percentile<T>
-where
-    T: WrapperType,
-{
-    fn state(&self) -> Result<Vec<Value>> {
-        let nums = self
-            .greater
-            .iter()
-            .map(|x| &x.0)
-            .chain(self.not_greater.iter())
-            .map(|&n| n.into())
-            .collect::<Vec<Value>>();
-        Ok(vec![
-            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
-            self.p.into(),
-        ])
-    }
-
-    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-        ensure!(values.len() == 2, InvalidInputStateSnafu);
-        ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
-
-        if values[0].len() == 0 {
-            return Ok(());
-        }
-
-        // This is a unary accumulator, so only one column is provided.
-        let column = &values[0];
-        let mut len = 1;
-        let column: &<T as Scalar>::VectorType = if column.is_const() {
-            len = column.len();
-            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
-            unsafe { Helper::static_cast(column.inner()) }
-        } else {
-            unsafe { Helper::static_cast(column) }
-        };
-
-        let x = &values[1];
-        let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
-            err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
-        })?;
-        // `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
-        let first = x.get(0);
-        ensure!(!first.is_null(), InvalidInputColSnafu);
-
-        for i in 1..x.len() {
-            ensure!(first == x.get(i), InvalidInputColSnafu);
-        }
-
-        let first = match first {
-            Value::Float64(OrderedFloat(v)) => v,
-            // unreachable because we have checked `first` is not null and is i64 above
-            _ => unreachable!(),
-        };
-        if let Some(p) = self.p {
-            ensure!(p == first, InvalidInputColSnafu);
-        } else {
-            self.p = Some(first);
-        };
-
-        (0..len).for_each(|_| {
-            for v in column.iter_data().flatten() {
-                self.push(v);
-            }
-        });
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-
-        ensure!(
-            states.len() == 2,
-            BadAccumulatorImplSnafu {
-                err_msg: "expect 2 states in `merge_batch`"
-            }
-        );
-
-        let p = &states[1];
-        let p = p
-            .as_any()
-            .downcast_ref::<Float64Vector>()
-            .with_context(|| DowncastVectorSnafu {
-                err_msg: format!(
-                    "expect float64vector, got vector type {}",
-                    p.vector_type_name()
-                ),
-            })?;
-        let p = p.get(0);
-        if p.is_null() {
-            return Ok(());
-        }
-        let p = match p {
-            Value::Float64(OrderedFloat(p)) => p,
-            _ => unreachable!(),
-        };
-        self.p = Some(p);
-
-        let values = &states[0];
-        let values = values
-            .as_any()
-            .downcast_ref::<ListVector>()
-            .with_context(|| DowncastVectorSnafu {
-                err_msg: format!(
-                    "expect ListVector, got vector type {}",
-                    values.vector_type_name()
-                ),
-            })?;
-        for value in values.values_iter() {
-            if let Some(value) = value.context(FromScalarValueSnafu)? {
-                let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
-                for v in column.iter_data().flatten() {
-                    self.push(v);
-                }
-            }
-        }
-        Ok(())
-    }
-
-    fn evaluate(&self) -> Result<Value> {
-        if self.not_greater.is_empty() {
-            assert!(
-                self.greater.is_empty(),
-                "not expected in two-heap percentile algorithm, there must be a bug when implementing it"
-            );
-        }
-        let not_greater = self.not_greater.peek();
-        if not_greater.is_none() {
-            return Ok(Value::Null);
-        }
-        let not_greater = (*self.not_greater.peek().unwrap()).as_primitive();
-        let percentile = if self.greater.is_empty() {
-            NumCast::from(not_greater).unwrap()
-        } else {
-            let greater = self.greater.peek().unwrap();
-            let p = if let Some(p) = self.p {
-                p
-            } else {
-                return Ok(Value::Null);
-            };
-            let fract = (((self.n - 1) as f64) * p / 100_f64).fract();
-            let not_greater_v: f64 = NumCast::from(not_greater).unwrap();
-            let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
-            not_greater_v * (1.0 - fract) + greater_v * fract
-        };
-        Ok(Value::from(percentile))
-    }
-}
-
-#[as_aggr_func_creator]
-#[derive(Debug, Default, AggrFuncTypeStore)]
-pub struct PercentileAccumulatorCreator {}
-
-impl AggregateFunctionCreator for PercentileAccumulatorCreator {
-    fn creator(&self) -> AccumulatorCreatorFunction {
-        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
-            let input_type = &types[0];
-            with_match_primitive_type_id!(
-                input_type.logical_type_id(),
-                |$S| {
-                    Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
-                },
-                {
-                    let err_msg = format!(
-                        "\"PERCENTILE\" aggregate function not support data type {:?}",
-                        input_type.logical_type_id(),
-                    );
-                    CreateAccumulatorSnafu { err_msg }.fail()?
-                }
-            )
-        });
-        creator
-    }
-
-    fn output_type(&self) -> Result<ConcreteDataType> {
-        let input_types = self.input_types()?;
-        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
-        // unwrap is safe because we have checked input_types len must equals 1
-        Ok(ConcreteDataType::float64_datatype())
-    }
-
-    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
-        let input_types = self.input_types()?;
-        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
-        Ok(vec![
-            ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
-            ConcreteDataType::float64_datatype(),
-        ])
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use datatypes::vectors::{Float64Vector, Int32Vector};
-
-    use super::*;
-    #[test]
-    fn test_update_batch() {
-        // test update empty batch, expect not updating anything
-        let mut percentile = Percentile::<i32>::default();
-        percentile.update_batch(&[]).unwrap();
-        assert!(percentile.not_greater.is_empty());
-        assert!(percentile.greater.is_empty());
-        assert_eq!(Value::Null, percentile.evaluate().unwrap());
-
-        // test update one not-null value
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(42)])),
-            Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
-
-        // test update one null value
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
-            Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::Null, percentile.evaluate().unwrap());
-
-        // test update no null-value batch
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(100.0_f64),
-                Some(100.0_f64),
-                Some(100.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(2_f64), percentile.evaluate().unwrap());
-
-        // test update null-value batch
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(100.0_f64),
-                Some(100.0_f64),
-                Some(100.0_f64),
-                Some(100.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
-
-        // test update with constant vector
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(ConstantVector::new(
-                Arc::new(Int32Vector::from_vec(vec![4])),
-                2,
-            )),
-            Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
-
-        // test left border
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(0.0_f64),
-                Some(0.0_f64),
-                Some(0.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(-1.0_f64), percentile.evaluate().unwrap());
-
-        // test medium
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(50.0_f64),
-                Some(50.0_f64),
-                Some(50.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(1.0_f64), percentile.evaluate().unwrap());
-
-        // test right border
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(100.0_f64),
-                Some(100.0_f64),
-                Some(100.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(2.0_f64), percentile.evaluate().unwrap());
-
-        // the following is the result of numpy.percentile
-        // numpy.percentile
-        // a = np.array([[10,7,4]])
-        // np.percentile(a,40)
-        // >> 6.400000000000
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(40.0_f64),
-                Some(40.0_f64),
-                Some(40.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(Value::from(6.400000000_f64), percentile.evaluate().unwrap());
-
-        // the following is the result of numpy.percentile
-        // a = np.array([[10,7,4]])
-        // np.percentile(a,95)
-        // >> 9.7000000000000011
-        let mut percentile = Percentile::<i32>::default();
-        let v: Vec<VectorRef> = vec![
-            Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
-            Arc::new(Float64Vector::from(vec![
-                Some(95.0_f64),
-                Some(95.0_f64),
-                Some(95.0_f64),
-            ])),
-        ];
-        percentile.update_batch(&v).unwrap();
-        assert_eq!(
-            Value::from(9.700_000_000_000_001_f64),
-            percentile.evaluate().unwrap()
-        );
-    }
-}
--- a/src/common/function/src/scalars/aggregate/polyval.rs
+++ b/src/common/function/src/scalars/aggregate/polyval.rs
@@ -18,8 +18,9 @@ use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
-    FromScalarValueSnafu, InvalidInputColSnafu, Result,
+    FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
 };
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
@@ -17,8 +17,10 @@ use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
-    FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
+    FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
+    Result,
 };
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
@@ -17,8 +17,10 @@ use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
-    FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
+    FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
+    Result,
 };
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
--- a/src/common/function/src/scalars/date/date_add.rs
+++ b/src/common/function/src/scalars/date/date_add.rs
@@ -14,18 +14,19 @@

 use std::fmt;

-use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
+use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
 use common_query::prelude::Signature;
-use datatypes::data_type::DataType;
+use datatypes::arrow::compute::kernels::numeric;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::value::ValueRef;
-use datatypes::vectors::VectorRef;
-use snafu::ensure;
+use datatypes::vectors::{Helper, VectorRef};
+use snafu::{ensure, ResultExt};

 use crate::function::{Function, FunctionContext};
 use crate::helper;

-/// A function adds an interval value to Timestamp, Date or DateTime, and return the result.
+/// A function adds an interval value to Timestamp, Date, and return the result.
+/// The implementation of datetime type is based on Date64 which is incorrect so this function
+/// doesn't support the datetime type.
 #[derive(Clone, Debug, Default)]
 pub struct DateAddFunction;

@@ -44,7 +45,6 @@ impl Function for DateAddFunction {
        helper::one_of_sigs2(
            vec![
                ConcreteDataType::date_datatype(),
-                ConcreteDataType::datetime_datatype(),
                ConcreteDataType::timestamp_second_datatype(),
                ConcreteDataType::timestamp_millisecond_datatype(),
                ConcreteDataType::timestamp_microsecond_datatype(),
@@ -69,64 +69,14 @@ impl Function for DateAddFunction {
            }
        );

-        let left = &columns[0];
-        let right = &columns[1];
+        let left = columns[0].to_arrow_array();
+        let right = columns[1].to_arrow_array();

-        let size = left.len();
-        let left_datatype = columns[0].data_type();
-        match left_datatype {
-            ConcreteDataType::Timestamp(_) => {
-                let mut result = left_datatype.create_mutable_vector(size);
-                for i in 0..size {
-                    let ts = left.get(i).as_timestamp();
-                    let interval = right.get(i).as_interval();
-
-                    let new_ts = match (ts, interval) {
-                        (Some(ts), Some(interval)) => ts.add_interval(interval),
-                        _ => ts,
-                    };
-
-                    result.push_value_ref(ValueRef::from(new_ts));
-                }
-
-                Ok(result.to_vector())
-            }
-            ConcreteDataType::Date(_) => {
-                let mut result = left_datatype.create_mutable_vector(size);
-                for i in 0..size {
-                    let date = left.get(i).as_date();
-                    let interval = right.get(i).as_interval();
-                    let new_date = match (date, interval) {
-                        (Some(date), Some(interval)) => date.add_interval(interval),
-                        _ => date,
-                    };
-
-                    result.push_value_ref(ValueRef::from(new_date));
-                }
-
-                Ok(result.to_vector())
-            }
-            ConcreteDataType::DateTime(_) => {
-                let mut result = left_datatype.create_mutable_vector(size);
-                for i in 0..size {
-                    let datetime = left.get(i).as_datetime();
-                    let interval = right.get(i).as_interval();
-                    let new_datetime = match (datetime, interval) {
-                        (Some(datetime), Some(interval)) => datetime.add_interval(interval),
-                        _ => datetime,
-                    };
-
-                    result.push_value_ref(ValueRef::from(new_datetime));
-                }
-
-                Ok(result.to_vector())
-            }
-            _ => UnsupportedInputDataTypeSnafu {
-                function: NAME,
-                datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
-            }
-            .fail(),
-        }
+        let result = numeric::add(&left, &right).context(ArrowComputeSnafu)?;
+        let arrow_type = result.data_type().clone();
+        Helper::try_into_vector(result).context(IntoVectorSnafu {
+            data_type: arrow_type,
+        })
    }
 }

@@ -144,8 +94,7 @@ mod tests {
    use datatypes::prelude::ConcreteDataType;
    use datatypes::value::Value;
    use datatypes::vectors::{
-        DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
-        TimestampSecondVector,
+        DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
    };

    use super::{DateAddFunction, *};
@@ -168,16 +117,15 @@ mod tests {
            ConcreteDataType::date_datatype(),
            f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
        );
-        assert_eq!(
-            ConcreteDataType::datetime_datatype(),
-            f.return_type(&[ConcreteDataType::datetime_datatype()])
-                .unwrap()
-        );
-        assert!(matches!(f.signature(),
+        assert!(
+            matches!(f.signature(),
                         Signature {
                             type_signature: TypeSignature::OneOf(sigs),
                             volatility: Volatility::Immutable
-                         } if  sigs.len() == 18));
+                         } if  sigs.len() == 15),
+            "{:?}",
+            f.signature()
+        );
    }

    #[test]
@@ -243,36 +191,4 @@ mod tests {
            }
        }
    }
-
-    #[test]
-    fn test_datetime_date_add() {
-        let f = DateAddFunction;
-
-        let dates = vec![Some(123), None, Some(42), None];
-        // Intervals in months
-        let intervals = vec![1, 2, 3, 1];
-        let results = [Some(2678400123), None, Some(7776000042), None];
-
-        let date_vector = DateTimeVector::from(dates.clone());
-        let interval_vector = IntervalYearMonthVector::from_vec(intervals);
-        let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
-        let vector = f.eval(FunctionContext::default(), &args).unwrap();
-
-        assert_eq!(4, vector.len());
-        for (i, _t) in dates.iter().enumerate() {
-            let v = vector.get(i);
-            let result = results.get(i).unwrap();
-
-            if result.is_none() {
-                assert_eq!(Value::Null, v);
-                continue;
-            }
-            match v {
-                Value::DateTime(date) => {
-                    assert_eq!(date.val(), result.unwrap());
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
 }
--- a/src/common/function/src/scalars/date/date_sub.rs
+++ b/src/common/function/src/scalars/date/date_sub.rs
@@ -14,18 +14,19 @@

 use std::fmt;

-use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
+use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
 use common_query::prelude::Signature;
-use datatypes::data_type::DataType;
+use datatypes::arrow::compute::kernels::numeric;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::value::ValueRef;
-use datatypes::vectors::VectorRef;
-use snafu::ensure;
+use datatypes::vectors::{Helper, VectorRef};
+use snafu::{ensure, ResultExt};

 use crate::function::{Function, FunctionContext};
 use crate::helper;

-/// A function subtracts an interval value to Timestamp, Date or DateTime, and return the result.
+/// A function subtracts an interval value to Timestamp, Date, and return the result.
+/// The implementation of datetime type is based on Date64 which is incorrect so this function
+/// doesn't support the datetime type.
 #[derive(Clone, Debug, Default)]
 pub struct DateSubFunction;

@@ -44,7 +45,6 @@ impl Function for DateSubFunction {
        helper::one_of_sigs2(
            vec![
                ConcreteDataType::date_datatype(),
-                ConcreteDataType::datetime_datatype(),
                ConcreteDataType::timestamp_second_datatype(),
                ConcreteDataType::timestamp_millisecond_datatype(),
                ConcreteDataType::timestamp_microsecond_datatype(),
@@ -69,65 +69,14 @@ impl Function for DateSubFunction {
            }
        );

-        let left = &columns[0];
-        let right = &columns[1];
+        let left = columns[0].to_arrow_array();
+        let right = columns[1].to_arrow_array();

-        let size = left.len();
-        let left_datatype = columns[0].data_type();
-
-        match left_datatype {
-            ConcreteDataType::Timestamp(_) => {
-                let mut result = left_datatype.create_mutable_vector(size);
-                for i in 0..size {
-                    let ts = left.get(i).as_timestamp();
-                    let interval = right.get(i).as_interval();
-
-                    let new_ts = match (ts, interval) {
-                        (Some(ts), Some(interval)) => ts.sub_interval(interval),
-                        _ => ts,
-                    };
-
-                    result.push_value_ref(ValueRef::from(new_ts));
-                }
-
-                Ok(result.to_vector())
-            }
-            ConcreteDataType::Date(_) => {
-                let mut result = left_datatype.create_mutable_vector(size);
-                for i in 0..size {
-                    let date = left.get(i).as_date();
-                    let interval = right.get(i).as_interval();
-                    let new_date = match (date, interval) {
-                        (Some(date), Some(interval)) => date.sub_interval(interval),
-                        _ => date,
-                    };
-
-                    result.push_value_ref(ValueRef::from(new_date));
-                }
-
-                Ok(result.to_vector())
-            }
-            ConcreteDataType::DateTime(_) => {
-                let mut result = left_datatype.create_mutable_vector(size);
-                for i in 0..size {
-                    let datetime = left.get(i).as_datetime();
-                    let interval = right.get(i).as_interval();
-                    let new_datetime = match (datetime, interval) {
-                        (Some(datetime), Some(interval)) => datetime.sub_interval(interval),
-                        _ => datetime,
-                    };
-
-                    result.push_value_ref(ValueRef::from(new_datetime));
-                }
-
-                Ok(result.to_vector())
-            }
-            _ => UnsupportedInputDataTypeSnafu {
-                function: NAME,
-                datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
-            }
-            .fail(),
-        }
+        let result = numeric::sub(&left, &right).context(ArrowComputeSnafu)?;
+        let arrow_type = result.data_type().clone();
+        Helper::try_into_vector(result).context(IntoVectorSnafu {
+            data_type: arrow_type,
+        })
    }
 }

@@ -145,8 +94,7 @@ mod tests {
    use datatypes::prelude::ConcreteDataType;
    use datatypes::value::Value;
    use datatypes::vectors::{
-        DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
-        TimestampSecondVector,
+        DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
    };

    use super::{DateSubFunction, *};
@@ -174,11 +122,15 @@ mod tests {
            f.return_type(&[ConcreteDataType::datetime_datatype()])
                .unwrap()
        );
-        assert!(matches!(f.signature(),
+        assert!(
+            matches!(f.signature(),
                         Signature {
                             type_signature: TypeSignature::OneOf(sigs),
                             volatility: Volatility::Immutable
-                         } if  sigs.len() == 18));
+                         } if  sigs.len() == 15),
+            "{:?}",
+            f.signature()
+        );
    }

    #[test]
@@ -250,42 +202,4 @@ mod tests {
            }
        }
    }
-
-    #[test]
-    fn test_datetime_date_sub() {
-        let f = DateSubFunction;
-        let millis_per_month = 3600 * 24 * 30 * 1000;
-
-        let dates = vec![
-            Some(123 * millis_per_month),
-            None,
-            Some(42 * millis_per_month),
-            None,
-        ];
-        // Intervals in months
-        let intervals = vec![1, 2, 3, 1];
-        let results = [Some(316137600000), None, Some(100915200000), None];
-
-        let date_vector = DateTimeVector::from(dates.clone());
-        let interval_vector = IntervalYearMonthVector::from_vec(intervals);
-        let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
-        let vector = f.eval(FunctionContext::default(), &args).unwrap();
-
-        assert_eq!(4, vector.len());
-        for (i, _t) in dates.iter().enumerate() {
-            let v = vector.get(i);
-            let result = results.get(i).unwrap();
-
-            if result.is_none() {
-                assert_eq!(Value::Null, v);
-                continue;
-            }
-            match v {
-                Value::DateTime(date) => {
-                    assert_eq!(date.val(), result.unwrap());
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
 }
--- a/src/common/function/src/scalars/geo.rs
+++ b/src/common/function/src/scalars/geo.rs
@@ -13,11 +13,11 @@
 // limitations under the License.

 use std::sync::Arc;
+pub(crate) mod encoding;
 mod geohash;
 mod h3;
-
-use geohash::GeohashFunction;
-use h3::H3Function;
+mod helpers;
+mod s2;

 use crate::function_registry::FunctionRegistry;

@@ -25,7 +25,40 @@ pub(crate) struct GeoFunctions;

 impl GeoFunctions {
    pub fn register(registry: &FunctionRegistry) {
-        registry.register(Arc::new(GeohashFunction));
-        registry.register(Arc::new(H3Function));
+        // geohash
+        registry.register(Arc::new(geohash::GeohashFunction));
+        registry.register(Arc::new(geohash::GeohashNeighboursFunction));
+
+        // h3 index
+        registry.register(Arc::new(h3::H3LatLngToCell));
+        registry.register(Arc::new(h3::H3LatLngToCellString));
+
+        // h3 index inspection
+        registry.register(Arc::new(h3::H3CellBase));
+        registry.register(Arc::new(h3::H3CellIsPentagon));
+        registry.register(Arc::new(h3::H3StringToCell));
+        registry.register(Arc::new(h3::H3CellToString));
+        registry.register(Arc::new(h3::H3CellCenterLatLng));
+        registry.register(Arc::new(h3::H3CellResolution));
+
+        // h3 hierarchical grid
+        registry.register(Arc::new(h3::H3CellCenterChild));
+        registry.register(Arc::new(h3::H3CellParent));
+        registry.register(Arc::new(h3::H3CellToChildren));
+        registry.register(Arc::new(h3::H3CellToChildrenSize));
+        registry.register(Arc::new(h3::H3CellToChildPos));
+        registry.register(Arc::new(h3::H3ChildPosToCell));
+
+        // h3 grid traversal
+        registry.register(Arc::new(h3::H3GridDisk));
+        registry.register(Arc::new(h3::H3GridDiskDistances));
+        registry.register(Arc::new(h3::H3GridDistance));
+        registry.register(Arc::new(h3::H3GridPathCells));
+
+        // s2
+        registry.register(Arc::new(s2::S2LatLngToCell));
+        registry.register(Arc::new(s2::S2CellLevel));
+        registry.register(Arc::new(s2::S2CellToToken));
+        registry.register(Arc::new(s2::S2CellParent));
    }
 }
--- a/src/common/function/src/scalars/geo/encoding.rs
+++ b/src/common/function/src/scalars/geo/encoding.rs
@@ -0,0 +1,223 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_error::ext::{BoxedError, PlainError};
+use common_error::status_code::StatusCode;
+use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
+use common_query::error::{self, InvalidInputStateSnafu, Result};
+use common_query::logical_plan::accumulator::AggrFuncTypeStore;
+use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
+use common_query::prelude::AccumulatorCreatorFunction;
+use common_time::Timestamp;
+use datatypes::prelude::ConcreteDataType;
+use datatypes::value::{ListValue, Value};
+use datatypes::vectors::VectorRef;
+use snafu::{ensure, ResultExt};
+
+use super::helpers::{ensure_columns_len, ensure_columns_n};
+
+/// Accumulator of lat, lng, timestamp tuples
+#[derive(Debug)]
+pub struct JsonPathAccumulator {
+    timestamp_type: ConcreteDataType,
+    lat: Vec<Option<f64>>,
+    lng: Vec<Option<f64>>,
+    timestamp: Vec<Option<Timestamp>>,
+}
+
+impl JsonPathAccumulator {
+    fn new(timestamp_type: ConcreteDataType) -> Self {
+        Self {
+            lat: Vec::default(),
+            lng: Vec::default(),
+            timestamp: Vec::default(),
+            timestamp_type,
+        }
+    }
+}
+
+impl Accumulator for JsonPathAccumulator {
+    fn state(&self) -> Result<Vec<Value>> {
+        Ok(vec![
+            Value::List(ListValue::new(
+                self.lat.iter().map(|i| Value::from(*i)).collect(),
+                ConcreteDataType::float64_datatype(),
+            )),
+            Value::List(ListValue::new(
+                self.lng.iter().map(|i| Value::from(*i)).collect(),
+                ConcreteDataType::float64_datatype(),
+            )),
+            Value::List(ListValue::new(
+                self.timestamp.iter().map(|i| Value::from(*i)).collect(),
+                self.timestamp_type.clone(),
+            )),
+        ])
+    }
+
+    fn update_batch(&mut self, columns: &[VectorRef]) -> Result<()> {
+        // update batch as in datafusion just provides the accumulator original
+        //  input.
+        //
+        // columns is vec of [`lat`, `lng`, `timestamp`]
+        // where
+        // - `lat` is a vector of `Value::Float64` or similar type. Each item in
+        //  the vector is a row in given dataset.
+        // - so on so forth for `lng` and `timestamp`
+        ensure_columns_n!(columns, 3);
+
+        let lat = &columns[0];
+        let lng = &columns[1];
+        let ts = &columns[2];
+
+        let size = lat.len();
+
+        for idx in 0..size {
+            self.lat.push(lat.get(idx).as_f64_lossy());
+            self.lng.push(lng.get(idx).as_f64_lossy());
+            self.timestamp.push(ts.get(idx).as_timestamp());
+        }
+
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
+        // merge batch as in datafusion gives state accumulated from the data
+        //  returned from child accumulators' state() call
+        // In our particular implementation, the data structure is like
+        //
+        // states is vec of [`lat`, `lng`, `timestamp`]
+        // where
+        // - `lat` is a vector of `Value::List`. Each item in the list is all
+        //  coordinates from a child accumulator.
+        // - so on so forth for `lng` and `timestamp`
+
+        ensure_columns_n!(states, 3);
+
+        let lat_lists = &states[0];
+        let lng_lists = &states[1];
+        let ts_lists = &states[2];
+
+        let len = lat_lists.len();
+
+        for idx in 0..len {
+            if let Some(lat_list) = lat_lists
+                .get(idx)
+                .as_list()
+                .map_err(BoxedError::new)
+                .context(error::ExecuteSnafu)?
+            {
+                for v in lat_list.items() {
+                    self.lat.push(v.as_f64_lossy());
+                }
+            }
+
+            if let Some(lng_list) = lng_lists
+                .get(idx)
+                .as_list()
+                .map_err(BoxedError::new)
+                .context(error::ExecuteSnafu)?
+            {
+                for v in lng_list.items() {
+                    self.lng.push(v.as_f64_lossy());
+                }
+            }
+
+            if let Some(ts_list) = ts_lists
+                .get(idx)
+                .as_list()
+                .map_err(BoxedError::new)
+                .context(error::ExecuteSnafu)?
+            {
+                for v in ts_list.items() {
+                    self.timestamp.push(v.as_timestamp());
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&self) -> Result<Value> {
+        let mut work_vec: Vec<(&Option<f64>, &Option<f64>, &Option<Timestamp>)> = self
+            .lat
+            .iter()
+            .zip(self.lng.iter())
+            .zip(self.timestamp.iter())
+            .map(|((a, b), c)| (a, b, c))
+            .collect();
+
+        // sort by timestamp, we treat null timestamp as 0
+        work_vec.sort_unstable_by_key(|tuple| tuple.2.unwrap_or_else(|| Timestamp::new_second(0)));
+
+        let result = serde_json::to_string(
+            &work_vec
+                .into_iter()
+                // note that we transform to lng,lat for geojson compatibility
+                .map(|(lat, lng, _)| vec![lng, lat])
+                .collect::<Vec<Vec<&Option<f64>>>>(),
+        )
+        .map_err(|e| {
+            BoxedError::new(PlainError::new(
+                format!("Serialization failure: {}", e),
+                StatusCode::EngineExecuteQuery,
+            ))
+        })
+        .context(error::ExecuteSnafu)?;
+
+        Ok(Value::String(result.into()))
+    }
+}
+
+/// This function accept rows of lat, lng and timestamp, sort with timestamp and
+/// encoding them into a geojson-like path.
+///
+/// Example:
+///
+/// ```sql
+/// SELECT json_encode_path(lat, lon, timestamp) FROM table [group by ...];
+/// ```
+///
+#[as_aggr_func_creator]
+#[derive(Debug, Default, AggrFuncTypeStore)]
+pub struct JsonPathEncodeFunctionCreator {}
+
+impl AggregateFunctionCreator for JsonPathEncodeFunctionCreator {
+    fn creator(&self) -> AccumulatorCreatorFunction {
+        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
+            let ts_type = types[2].clone();
+            Ok(Box::new(JsonPathAccumulator::new(ts_type)))
+        });
+
+        creator
+    }
+
+    fn output_type(&self) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::string_datatype())
+    }
+
+    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
+        let input_types = self.input_types()?;
+        ensure!(input_types.len() == 3, InvalidInputStateSnafu);
+
+        let timestamp_type = input_types[2].clone();
+
+        Ok(vec![
+            ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
+            ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
+            ConcreteDataType::list_datatype(timestamp_type),
+        ])
+    }
+}
--- a/src/common/function/src/scalars/geo/geohash.rs
+++ b/src/common/function/src/scalars/geo/geohash.rs
@@ -20,23 +20,69 @@ use common_query::error::{self, InvalidFuncArgsSnafu, Result};
 use common_query::prelude::{Signature, TypeSignature};
 use datafusion::logical_expr::Volatility;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::scalars::ScalarVectorBuilder;
-use datatypes::value::Value;
-use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
+use datatypes::scalars::{Scalar, ScalarVectorBuilder};
+use datatypes::value::{ListValue, Value};
+use datatypes::vectors::{ListVectorBuilder, MutableVector, StringVectorBuilder, VectorRef};
 use geohash::Coord;
 use snafu::{ensure, ResultExt};

 use crate::function::{Function, FunctionContext};

+macro_rules! ensure_resolution_usize {
+    ($v: ident) => {
+        if !($v > 0 && $v <= 12) {
+            Err(BoxedError::new(PlainError::new(
+                format!("Invalid geohash resolution {}, expect value: [1, 12]", $v),
+                StatusCode::EngineExecuteQuery,
+            )))
+            .context(error::ExecuteSnafu)
+        } else {
+            Ok($v as usize)
+        }
+    };
+}
+
+fn try_into_resolution(v: Value) -> Result<usize> {
+    match v {
+        Value::Int8(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::Int16(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::Int32(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::Int64(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::UInt8(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::UInt16(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::UInt32(v) => {
+            ensure_resolution_usize!(v)
+        }
+        Value::UInt64(v) => {
+            ensure_resolution_usize!(v)
+        }
+        _ => unreachable!(),
+    }
+}
+
 /// Function that return geohash string for a given geospatial coordinate.
 #[derive(Clone, Debug, Default)]
 pub struct GeohashFunction;

-const NAME: &str = "geohash";
+impl GeohashFunction {
+    const NAME: &'static str = "geohash";
+}

 impl Function for GeohashFunction {
    fn name(&self) -> &str {
-        NAME
+        Self::NAME
    }

    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
@@ -93,17 +139,7 @@ impl Function for GeohashFunction {
        for i in 0..size {
            let lat = lat_vec.get(i).as_f64_lossy();
            let lon = lon_vec.get(i).as_f64_lossy();
-            let r = match resolution_vec.get(i) {
-                Value::Int8(v) => v as usize,
-                Value::Int16(v) => v as usize,
-                Value::Int32(v) => v as usize,
-                Value::Int64(v) => v as usize,
-                Value::UInt8(v) => v as usize,
-                Value::UInt16(v) => v as usize,
-                Value::UInt32(v) => v as usize,
-                Value::UInt64(v) => v as usize,
-                _ => unreachable!(),
-            };
+            let r = try_into_resolution(resolution_vec.get(i))?;

            let result = match (lat, lon) {
                (Some(lat), Some(lon)) => {
@@ -130,6 +166,134 @@ impl Function for GeohashFunction {

 impl fmt::Display for GeohashFunction {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", NAME)
+        write!(f, "{}", Self::NAME)
+    }
+}
+
+/// Function that return geohash string for a given geospatial coordinate.
+#[derive(Clone, Debug, Default)]
+pub struct GeohashNeighboursFunction;
+
+impl GeohashNeighboursFunction {
+    const NAME: &'static str = "geohash_neighbours";
+}
+
+impl Function for GeohashNeighboursFunction {
+    fn name(&self) -> &str {
+        GeohashNeighboursFunction::NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::list_datatype(
+            ConcreteDataType::string_datatype(),
+        ))
+    }
+
+    fn signature(&self) -> Signature {
+        let mut signatures = Vec::new();
+        for coord_type in &[
+            ConcreteDataType::float32_datatype(),
+            ConcreteDataType::float64_datatype(),
+        ] {
+            for resolution_type in &[
+                ConcreteDataType::int8_datatype(),
+                ConcreteDataType::int16_datatype(),
+                ConcreteDataType::int32_datatype(),
+                ConcreteDataType::int64_datatype(),
+                ConcreteDataType::uint8_datatype(),
+                ConcreteDataType::uint16_datatype(),
+                ConcreteDataType::uint32_datatype(),
+                ConcreteDataType::uint64_datatype(),
+            ] {
+                signatures.push(TypeSignature::Exact(vec![
+                    // latitude
+                    coord_type.clone(),
+                    // longitude
+                    coord_type.clone(),
+                    // resolution
+                    resolution_type.clone(),
+                ]));
+            }
+        }
+        Signature::one_of(signatures, Volatility::Stable)
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure!(
+            columns.len() == 3,
+            InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect 3, provided : {}",
+                    columns.len()
+                ),
+            }
+        );
+
+        let lat_vec = &columns[0];
+        let lon_vec = &columns[1];
+        let resolution_vec = &columns[2];
+
+        let size = lat_vec.len();
+        let mut results =
+            ListVectorBuilder::with_type_capacity(ConcreteDataType::string_datatype(), size);
+
+        for i in 0..size {
+            let lat = lat_vec.get(i).as_f64_lossy();
+            let lon = lon_vec.get(i).as_f64_lossy();
+            let r = try_into_resolution(resolution_vec.get(i))?;
+
+            let result = match (lat, lon) {
+                (Some(lat), Some(lon)) => {
+                    let coord = Coord { x: lon, y: lat };
+                    let encoded = geohash::encode(coord, r)
+                        .map_err(|e| {
+                            BoxedError::new(PlainError::new(
+                                format!("Geohash error: {}", e),
+                                StatusCode::EngineExecuteQuery,
+                            ))
+                        })
+                        .context(error::ExecuteSnafu)?;
+                    let neighbours = geohash::neighbors(&encoded)
+                        .map_err(|e| {
+                            BoxedError::new(PlainError::new(
+                                format!("Geohash error: {}", e),
+                                StatusCode::EngineExecuteQuery,
+                            ))
+                        })
+                        .context(error::ExecuteSnafu)?;
+                    Some(ListValue::new(
+                        vec![
+                            neighbours.n,
+                            neighbours.nw,
+                            neighbours.w,
+                            neighbours.sw,
+                            neighbours.s,
+                            neighbours.se,
+                            neighbours.e,
+                            neighbours.ne,
+                        ]
+                        .into_iter()
+                        .map(Value::from)
+                        .collect(),
+                        ConcreteDataType::string_datatype(),
+                    ))
+                }
+                _ => None,
+            };
+
+            if let Some(list_value) = result {
+                results.push(Some(list_value.as_scalar_ref()));
+            } else {
+                results.push(None);
+            }
+        }
+
+        Ok(results.to_vector())
+    }
+}
+
+impl fmt::Display for GeohashNeighboursFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", GeohashNeighboursFunction::NAME)
    }
 }
--- a/src/common/function/src/scalars/geo/h3.rs
+++ b/src/common/function/src/scalars/geo/h3.rs
--- a/src/common/function/src/scalars/geo/helpers.rs
+++ b/src/common/function/src/scalars/geo/helpers.rs
@@ -0,0 +1,75 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+macro_rules! ensure_columns_len {
+    ($columns:ident) => {
+        snafu::ensure!(
+            $columns.windows(2).all(|c| c[0].len() == c[1].len()),
+            common_query::error::InvalidFuncArgsSnafu {
+                err_msg: "The length of input columns are in different size"
+            }
+        )
+    };
+    ($column_a:ident, $column_b:ident, $($column_n:ident),*) => {
+        snafu::ensure!(
+            {
+                let mut result = $column_a.len() == $column_b.len();
+                $(
+                result = result && ($column_a.len() == $column_n.len());
+                )*
+                result
+            }
+            common_query::error::InvalidFuncArgsSnafu {
+                err_msg: "The length of input columns are in different size"
+            }
+        )
+    };
+}
+
+pub(super) use ensure_columns_len;
+
+macro_rules! ensure_columns_n {
+    ($columns:ident, $n:literal) => {
+        snafu::ensure!(
+            $columns.len() == $n,
+            common_query::error::InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of arguments is not correct, expect {}, provided : {}",
+                    stringify!($n),
+                    $columns.len()
+                ),
+            }
+        );
+
+        if $n > 1 {
+            ensure_columns_len!($columns);
+        }
+    };
+}
+
+pub(super) use ensure_columns_n;
+
+macro_rules! ensure_and_coerce {
+    ($compare:expr, $coerce:expr) => {{
+        snafu::ensure!(
+            $compare,
+            common_query::error::InvalidFuncArgsSnafu {
+                err_msg: "Argument was outside of acceptable range "
+            }
+        );
+        Ok($coerce)
+    }};
+}
+
+pub(super) use ensure_and_coerce;
--- a/src/common/function/src/scalars/geo/s2.rs
+++ b/src/common/function/src/scalars/geo/s2.rs
@@ -0,0 +1,275 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_query::error::{InvalidFuncArgsSnafu, Result};
+use common_query::prelude::{Signature, TypeSignature};
+use datafusion::logical_expr::Volatility;
+use datatypes::prelude::ConcreteDataType;
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::value::Value;
+use datatypes::vectors::{MutableVector, StringVectorBuilder, UInt64VectorBuilder, VectorRef};
+use derive_more::Display;
+use once_cell::sync::Lazy;
+use s2::cellid::{CellID, MAX_LEVEL};
+use s2::latlng::LatLng;
+use snafu::ensure;
+
+use crate::function::{Function, FunctionContext};
+use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
+
+static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
+    vec![
+        ConcreteDataType::int64_datatype(),
+        ConcreteDataType::uint64_datatype(),
+    ]
+});
+
+static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
+    vec![
+        ConcreteDataType::float32_datatype(),
+        ConcreteDataType::float64_datatype(),
+    ]
+});
+
+static LEVEL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
+    vec![
+        ConcreteDataType::int8_datatype(),
+        ConcreteDataType::int16_datatype(),
+        ConcreteDataType::int32_datatype(),
+        ConcreteDataType::int64_datatype(),
+        ConcreteDataType::uint8_datatype(),
+        ConcreteDataType::uint16_datatype(),
+        ConcreteDataType::uint32_datatype(),
+        ConcreteDataType::uint64_datatype(),
+    ]
+});
+
+/// Function that returns [s2] encoding cellid for a given geospatial coordinate.
+///
+/// [s2]: http://s2geometry.io
+#[derive(Clone, Debug, Default, Display)]
+#[display("{}", self.name())]
+pub struct S2LatLngToCell;
+
+impl Function for S2LatLngToCell {
+    fn name(&self) -> &str {
+        "s2_latlng_to_cell"
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::uint64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        let mut signatures = Vec::with_capacity(COORDINATE_TYPES.len());
+        for coord_type in COORDINATE_TYPES.as_slice() {
+            signatures.push(TypeSignature::Exact(vec![
+                // latitude
+                coord_type.clone(),
+                // longitude
+                coord_type.clone(),
+            ]));
+        }
+        Signature::one_of(signatures, Volatility::Stable)
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure_columns_n!(columns, 2);
+
+        let lat_vec = &columns[0];
+        let lon_vec = &columns[1];
+
+        let size = lat_vec.len();
+        let mut results = UInt64VectorBuilder::with_capacity(size);
+
+        for i in 0..size {
+            let lat = lat_vec.get(i).as_f64_lossy();
+            let lon = lon_vec.get(i).as_f64_lossy();
+
+            let result = match (lat, lon) {
+                (Some(lat), Some(lon)) => {
+                    let coord = LatLng::from_degrees(lat, lon);
+                    ensure!(
+                        coord.is_valid(),
+                        InvalidFuncArgsSnafu {
+                            err_msg: "The input coordinates are invalid",
+                        }
+                    );
+                    let cellid = CellID::from(coord);
+                    let encoded: u64 = cellid.0;
+                    Some(encoded)
+                }
+                _ => None,
+            };
+
+            results.push(result);
+        }
+
+        Ok(results.to_vector())
+    }
+}
+
+/// Return the level of current s2 cell
+#[derive(Clone, Debug, Default, Display)]
+#[display("{}", self.name())]
+pub struct S2CellLevel;
+
+impl Function for S2CellLevel {
+    fn name(&self) -> &str {
+        "s2_cell_level"
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::uint64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        signature_of_cell()
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure_columns_n!(columns, 1);
+
+        let cell_vec = &columns[0];
+        let size = cell_vec.len();
+        let mut results = UInt64VectorBuilder::with_capacity(size);
+
+        for i in 0..size {
+            let cell = cell_from_value(cell_vec.get(i));
+            let res = cell.map(|cell| cell.level());
+
+            results.push(res);
+        }
+
+        Ok(results.to_vector())
+    }
+}
+
+/// Return the string presentation of the cell
+#[derive(Clone, Debug, Default, Display)]
+#[display("{}", self.name())]
+pub struct S2CellToToken;
+
+impl Function for S2CellToToken {
+    fn name(&self) -> &str {
+        "s2_cell_to_token"
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::string_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        signature_of_cell()
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure_columns_n!(columns, 1);
+
+        let cell_vec = &columns[0];
+        let size = cell_vec.len();
+        let mut results = StringVectorBuilder::with_capacity(size);
+
+        for i in 0..size {
+            let cell = cell_from_value(cell_vec.get(i));
+            let res = cell.map(|cell| cell.to_token());
+
+            results.push(res.as_deref());
+        }
+
+        Ok(results.to_vector())
+    }
+}
+
+/// Return parent at given level of current s2 cell
+#[derive(Clone, Debug, Default, Display)]
+#[display("{}", self.name())]
+pub struct S2CellParent;
+
+impl Function for S2CellParent {
+    fn name(&self) -> &str {
+        "s2_cell_parent"
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::uint64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        signature_of_cell_and_level()
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure_columns_n!(columns, 2);
+
+        let cell_vec = &columns[0];
+        let level_vec = &columns[1];
+        let size = cell_vec.len();
+        let mut results = UInt64VectorBuilder::with_capacity(size);
+
+        for i in 0..size {
+            let cell = cell_from_value(cell_vec.get(i));
+            let level = value_to_level(level_vec.get(i))?;
+            let result = cell.map(|cell| cell.parent(level).0);
+
+            results.push(result);
+        }
+
+        Ok(results.to_vector())
+    }
+}
+
+fn signature_of_cell() -> Signature {
+    let mut signatures = Vec::with_capacity(CELL_TYPES.len());
+    for cell_type in CELL_TYPES.as_slice() {
+        signatures.push(TypeSignature::Exact(vec![cell_type.clone()]));
+    }
+
+    Signature::one_of(signatures, Volatility::Stable)
+}
+
+fn signature_of_cell_and_level() -> Signature {
+    let mut signatures = Vec::with_capacity(CELL_TYPES.len() * LEVEL_TYPES.len());
+    for cell_type in CELL_TYPES.as_slice() {
+        for level_type in LEVEL_TYPES.as_slice() {
+            signatures.push(TypeSignature::Exact(vec![
+                cell_type.clone(),
+                level_type.clone(),
+            ]));
+        }
+    }
+    Signature::one_of(signatures, Volatility::Stable)
+}
+
+fn cell_from_value(v: Value) -> Option<CellID> {
+    match v {
+        Value::Int64(v) => Some(CellID(v as u64)),
+        Value::UInt64(v) => Some(CellID(v)),
+        _ => None,
+    }
+}
+
+fn value_to_level(v: Value) -> Result<u64> {
+    match v {
+        Value::Int8(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i8, v as u64),
+        Value::Int16(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i16, v as u64),
+        Value::Int32(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i32, v as u64),
+        Value::Int64(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i64, v as u64),
+        Value::UInt8(v) => ensure_and_coerce!(v <= MAX_LEVEL as u8, v as u64),
+        Value::UInt16(v) => ensure_and_coerce!(v <= MAX_LEVEL as u16, v as u64),
+        Value::UInt32(v) => ensure_and_coerce!(v <= MAX_LEVEL as u32, v as u64),
+        Value::UInt64(v) => ensure_and_coerce!(v <= MAX_LEVEL, v),
+        _ => unreachable!(),
+    }
+}
--- a/src/common/function/src/scalars/json.rs
+++ b/src/common/function/src/scalars/json.rs
@@ -15,6 +15,7 @@
 use std::sync::Arc;
 mod json_get;
 mod json_is;
+mod json_path_exists;
 mod json_to_string;
 mod parse_json;

@@ -46,5 +47,7 @@ impl JsonFunction {
        registry.register(Arc::new(JsonIsBool));
        registry.register(Arc::new(JsonIsArray));
        registry.register(Arc::new(JsonIsObject));
+
+        registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
    }
 }
--- a/src/common/function/src/scalars/json/json_path_exists.rs
+++ b/src/common/function/src/scalars/json/json_path_exists.rs
@@ -0,0 +1,172 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::{self, Display};
+
+use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
+use common_query::prelude::Signature;
+use datafusion::logical_expr::Volatility;
+use datatypes::data_type::ConcreteDataType;
+use datatypes::prelude::VectorRef;
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
+use snafu::ensure;
+
+use crate::function::{Function, FunctionContext};
+
+/// Check if the given JSON data contains the given JSON path.
+#[derive(Clone, Debug, Default)]
+pub struct JsonPathExistsFunction;
+
+const NAME: &str = "json_path_exists";
+
+impl Function for JsonPathExistsFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::boolean_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::exact(
+            vec![
+                ConcreteDataType::json_datatype(),
+                ConcreteDataType::string_datatype(),
+            ],
+            Volatility::Immutable,
+        )
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        ensure!(
+            columns.len() == 2,
+            InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect exactly two, have: {}",
+                    columns.len()
+                ),
+            }
+        );
+        let jsons = &columns[0];
+        let paths = &columns[1];
+
+        let size = jsons.len();
+        let datatype = jsons.data_type();
+        let mut results = BooleanVectorBuilder::with_capacity(size);
+
+        match datatype {
+            // JSON data type uses binary vector
+            ConcreteDataType::Binary(_) => {
+                for i in 0..size {
+                    let json = jsons.get_ref(i);
+                    let path = paths.get_ref(i);
+
+                    let json = json.as_binary();
+                    let path = path.as_string();
+                    let result = match (json, path) {
+                        (Ok(Some(json)), Ok(Some(path))) => {
+                            let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
+                            match json_path {
+                                Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
+                                Err(_) => None,
+                            }
+                        }
+                        _ => None,
+                    };
+
+                    results.push(result);
+                }
+            }
+            _ => {
+                return UnsupportedInputDataTypeSnafu {
+                    function: NAME,
+                    datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
+                }
+                .fail();
+            }
+        }
+
+        Ok(results.to_vector())
+    }
+}
+
+impl Display for JsonPathExistsFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "JSON_PATH_EXISTS")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use common_query::prelude::TypeSignature;
+    use datatypes::scalars::ScalarVector;
+    use datatypes::vectors::{BinaryVector, StringVector};
+
+    use super::*;
+
+    #[test]
+    fn test_json_path_exists_function() {
+        let json_path_exists = JsonPathExistsFunction;
+
+        assert_eq!("json_path_exists", json_path_exists.name());
+        assert_eq!(
+            ConcreteDataType::boolean_datatype(),
+            json_path_exists
+                .return_type(&[ConcreteDataType::json_datatype()])
+                .unwrap()
+        );
+
+        assert!(matches!(json_path_exists.signature(),
+                         Signature {
+                             type_signature: TypeSignature::Exact(valid_types),
+                             volatility: Volatility::Immutable
+                         } if  valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
+        ));
+
+        let json_strings = [
+            r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
+            r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
+            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
+            r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
+        ];
+        let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
+        let results = [false, true, true, false];
+
+        let jsonbs = json_strings
+            .iter()
+            .map(|s| {
+                let value = jsonb::parse_value(s.as_bytes()).unwrap();
+                value.to_vec()
+            })
+            .collect::<Vec<_>>();
+
+        let json_vector = BinaryVector::from_vec(jsonbs);
+        let path_vector = StringVector::from_vec(paths);
+        let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
+        let vector = json_path_exists
+            .eval(FunctionContext::default(), &args)
+            .unwrap();
+
+        assert_eq!(4, vector.len());
+        for (i, gt) in results.iter().enumerate() {
+            let result = vector.get_ref(i);
+            let result = result.as_boolean().unwrap().unwrap();
+            assert_eq!(*gt, result);
+        }
+    }
+}
--- a/src/common/function/src/table/migrate_region.rs
+++ b/src/common/function/src/table/migrate_region.rs
@@ -25,13 +25,13 @@ use session::context::QueryContextRef;
 use crate::handlers::ProcedureServiceHandlerRef;
 use crate::helper::cast_u64;

-const DEFAULT_REPLAY_TIMEOUT_SECS: u64 = 10;
+const DEFAULT_TIMEOUT_SECS: u64 = 30;

 /// A function to migrate a region from source peer to target peer.
 /// Returns the submitted procedure id if success. Only available in cluster mode.
 ///
-/// - `migrate_region(region_id, from_peer, to_peer)`, with default replay WAL timeout(10 seconds).
-/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`
+/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
+/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
 ///
 /// The parameters:
 /// - `region_id`:  the region id
@@ -48,18 +48,13 @@ pub(crate) async fn migrate_region(
    _ctx: &QueryContextRef,
    params: &[ValueRef<'_>],
 ) -> Result<Value> {
-    let (region_id, from_peer, to_peer, replay_timeout) = match params.len() {
+    let (region_id, from_peer, to_peer, timeout) = match params.len() {
        3 => {
            let region_id = cast_u64(&params[0])?;
            let from_peer = cast_u64(&params[1])?;
            let to_peer = cast_u64(&params[2])?;

-            (
-                region_id,
-                from_peer,
-                to_peer,
-                Some(DEFAULT_REPLAY_TIMEOUT_SECS),
-            )
+            (region_id, from_peer, to_peer, Some(DEFAULT_TIMEOUT_SECS))
        }

        4 => {
@@ -82,14 +77,14 @@ pub(crate) async fn migrate_region(
        }
    };

-    match (region_id, from_peer, to_peer, replay_timeout) {
-        (Some(region_id), Some(from_peer), Some(to_peer), Some(replay_timeout)) => {
+    match (region_id, from_peer, to_peer, timeout) {
+        (Some(region_id), Some(from_peer), Some(to_peer), Some(timeout)) => {
            let pid = procedure_service_handler
                .migrate_region(MigrateRegionRequest {
                    region_id,
                    from_peer,
                    to_peer,
-                    replay_timeout: Duration::from_secs(replay_timeout),
+                    timeout: Duration::from_secs(timeout),
                })
                .await?;

--- a/src/common/grpc/src/select.rs
+++ b/src/common/grpc/src/select.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use api::helper::{convert_i128_to_interval, convert_to_pb_decimal128};
+use api::helper::{convert_month_day_nano_to_pb, convert_to_pb_decimal128};
 use api::v1::column::Values;
 use common_base::BitVec;
 use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
@@ -211,7 +211,7 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
            ConcreteDataType::Interval(IntervalType::MonthDayNano(_)),
            IntervalMonthDayNanoVector,
            interval_month_day_nano_values,
-            |x| { convert_i128_to_interval(x.into_native()) }
+            |x| { convert_month_day_nano_to_pb(x) }
        ),
        (
            ConcreteDataType::Decimal128(_),
--- a/src/common/macro/src/aggr_func.rs
+++ b/src/common/macro/src/aggr_func.rs
@@ -21,23 +21,19 @@ use syn::{parse_macro_input, DeriveInput, ItemStruct};
 pub(crate) fn impl_aggr_func_type_store(ast: &DeriveInput) -> TokenStream {
    let name = &ast.ident;
    let gen = quote! {
-        use common_query::logical_plan::accumulator::AggrFuncTypeStore;
-        use common_query::error::{InvalidInputStateSnafu, Error as QueryError};
-        use datatypes::prelude::ConcreteDataType;
-
-        impl AggrFuncTypeStore for #name {
-            fn input_types(&self) -> std::result::Result<Vec<ConcreteDataType>, QueryError> {
+        impl common_query::logical_plan::accumulator::AggrFuncTypeStore for #name {
+            fn input_types(&self) -> std::result::Result<Vec<datatypes::prelude::ConcreteDataType>, common_query::error::Error> {
                let input_types = self.input_types.load();
-                snafu::ensure!(input_types.is_some(), InvalidInputStateSnafu);
+                snafu::ensure!(input_types.is_some(), common_query::error::InvalidInputStateSnafu);
                Ok(input_types.as_ref().unwrap().as_ref().clone())
            }

-            fn set_input_types(&self, input_types: Vec<ConcreteDataType>) -> std::result::Result<(), QueryError> {
+            fn set_input_types(&self, input_types: Vec<datatypes::prelude::ConcreteDataType>) -> std::result::Result<(), common_query::error::Error> {
                let old = self.input_types.swap(Some(std::sync::Arc::new(input_types.clone())));
                if let Some(old) = old {
-                    snafu::ensure!(old.len() == input_types.len(), InvalidInputStateSnafu);
+                    snafu::ensure!(old.len() == input_types.len(), common_query::error::InvalidInputStateSnafu);
                    for (x, y) in old.iter().zip(input_types.iter()) {
-                        snafu::ensure!(x == y, InvalidInputStateSnafu);
+                        snafu::ensure!(x == y, common_query::error::InvalidInputStateSnafu);
                    }
                }
                Ok(())
@@ -51,7 +47,7 @@ pub(crate) fn impl_as_aggr_func_creator(_args: TokenStream, input: TokenStream)
    let mut item_struct = parse_macro_input!(input as ItemStruct);
    if let syn::Fields::Named(ref mut fields) = item_struct.fields {
        let result = syn::Field::parse_named.parse2(quote! {
-            input_types: arc_swap::ArcSwapOption<Vec<ConcreteDataType>>
+            input_types: arc_swap::ArcSwapOption<Vec<datatypes::prelude::ConcreteDataType>>
        });
        match result {
            Ok(field) => fields.named.push(field),
--- a/src/common/macro/tests/test_derive.rs
+++ b/src/common/macro/tests/test_derive.rs
@@ -24,5 +24,5 @@ struct Foo {}
 fn test_derive() {
    let _ = Foo::default();
    assert_fields!(Foo: input_types);
-    assert_impl_all!(Foo: std::fmt::Debug, Default, AggrFuncTypeStore);
+    assert_impl_all!(Foo: std::fmt::Debug, Default, common_query::logical_plan::accumulator::AggrFuncTypeStore);
 }
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -20,6 +20,7 @@ use regex::Regex;
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, OptionExt, ResultExt};

+use crate::datanode::RegionStat;
 use crate::error::{
    DecodeJsonSnafu, EncodeJsonSnafu, Error, FromUtf8Snafu, InvalidNodeInfoKeySnafu,
    InvalidRoleSnafu, ParseNumSnafu, Result,
@@ -47,6 +48,9 @@ pub trait ClusterInfo {
        role: Option<Role>,
    ) -> std::result::Result<Vec<NodeInfo>, Self::Error>;

+    /// List all region stats in the cluster.
+    async fn list_region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
+
    // TODO(jeremy): Other info, like region status, etc.
 }

--- a/src/common/meta/src/datanode.rs
+++ b/src/common/meta/src/datanode.rs
@@ -0,0 +1,413 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashSet;
+use std::str::FromStr;
+
+use api::v1::meta::{HeartbeatRequest, RequestHeader};
+use common_time::util as time_util;
+use lazy_static::lazy_static;
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use snafu::{ensure, OptionExt, ResultExt};
+use store_api::region_engine::{RegionRole, RegionStatistic};
+use store_api::storage::RegionId;
+use table::metadata::TableId;
+
+use crate::error::Result;
+use crate::{error, ClusterId};
+
+pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
+const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
+
+const DATANODE_STAT_PREFIX: &str = "__meta_datanode_stat";
+
+pub const REGION_STATISTIC_KEY: &str = "__region_statistic";
+
+lazy_static! {
+    pub(crate) static ref DATANODE_LEASE_KEY_PATTERN: Regex =
+        Regex::new(&format!("^{DATANODE_LEASE_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
+    static ref DATANODE_STAT_KEY_PATTERN: Regex =
+        Regex::new(&format!("^{DATANODE_STAT_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
+    static ref INACTIVE_REGION_KEY_PATTERN: Regex = Regex::new(&format!(
+        "^{INACTIVE_REGION_PREFIX}-([0-9]+)-([0-9]+)-([0-9]+)$"
+    ))
+    .unwrap();
+}
+
+/// The key of the datanode stat in the storage.
+///
+/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct Stat {
+    pub timestamp_millis: i64,
+    pub cluster_id: ClusterId,
+    // The datanode Id.
+    pub id: u64,
+    // The datanode address.
+    pub addr: String,
+    /// The read capacity units during this period
+    pub rcus: i64,
+    /// The write capacity units during this period
+    pub wcus: i64,
+    /// How many regions on this node
+    pub region_num: u64,
+    pub region_stats: Vec<RegionStat>,
+    // The node epoch is used to check whether the node has restarted or redeployed.
+    pub node_epoch: u64,
+}
+
+/// The statistics of a region.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RegionStat {
+    /// The region_id.
+    pub id: RegionId,
+    /// The read capacity units during this period
+    pub rcus: i64,
+    /// The write capacity units during this period
+    pub wcus: i64,
+    /// Approximate bytes of this region
+    pub approximate_bytes: i64,
+    /// The engine name.
+    pub engine: String,
+    /// The region role.
+    pub role: RegionRole,
+    /// The size of the memtable in bytes.
+    pub memtable_size: u64,
+    /// The size of the manifest in bytes.
+    pub manifest_size: u64,
+    /// The size of the SST files in bytes.
+    pub sst_size: u64,
+}
+
+impl Stat {
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.region_stats.is_empty()
+    }
+
+    pub fn stat_key(&self) -> DatanodeStatKey {
+        DatanodeStatKey {
+            cluster_id: self.cluster_id,
+            node_id: self.id,
+        }
+    }
+
+    /// Returns a tuple array containing [RegionId] and [RegionRole].
+    pub fn regions(&self) -> Vec<(RegionId, RegionRole)> {
+        self.region_stats.iter().map(|s| (s.id, s.role)).collect()
+    }
+
+    /// Returns all table ids in the region stats.
+    pub fn table_ids(&self) -> HashSet<TableId> {
+        self.region_stats.iter().map(|s| s.id.table_id()).collect()
+    }
+
+    /// Retains the active region stats and updates the rcus, wcus, and region_num.
+    pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet<RegionId>) {
+        if inactive_region_ids.is_empty() {
+            return;
+        }
+
+        self.region_stats
+            .retain(|r| !inactive_region_ids.contains(&r.id));
+        self.rcus = self.region_stats.iter().map(|s| s.rcus).sum();
+        self.wcus = self.region_stats.iter().map(|s| s.wcus).sum();
+        self.region_num = self.region_stats.len() as u64;
+    }
+}
+
+impl TryFrom<&HeartbeatRequest> for Stat {
+    type Error = Option<RequestHeader>;
+
+    fn try_from(value: &HeartbeatRequest) -> std::result::Result<Self, Self::Error> {
+        let HeartbeatRequest {
+            header,
+            peer,
+            region_stats,
+            node_epoch,
+            ..
+        } = value;
+
+        match (header, peer) {
+            (Some(header), Some(peer)) => {
+                let region_stats = region_stats
+                    .iter()
+                    .map(RegionStat::from)
+                    .collect::<Vec<_>>();
+
+                Ok(Self {
+                    timestamp_millis: time_util::current_time_millis(),
+                    cluster_id: header.cluster_id,
+                    // datanode id
+                    id: peer.id,
+                    // datanode address
+                    addr: peer.addr.clone(),
+                    rcus: region_stats.iter().map(|s| s.rcus).sum(),
+                    wcus: region_stats.iter().map(|s| s.wcus).sum(),
+                    region_num: region_stats.len() as u64,
+                    region_stats,
+                    node_epoch: *node_epoch,
+                })
+            }
+            (header, _) => Err(header.clone()),
+        }
+    }
+}
+
+impl From<&api::v1::meta::RegionStat> for RegionStat {
+    fn from(value: &api::v1::meta::RegionStat) -> Self {
+        let region_stat = value
+            .extensions
+            .get(REGION_STATISTIC_KEY)
+            .and_then(|value| RegionStatistic::deserialize_from_slice(value))
+            .unwrap_or_default();
+
+        Self {
+            id: RegionId::from_u64(value.region_id),
+            rcus: value.rcus,
+            wcus: value.wcus,
+            approximate_bytes: value.approximate_bytes,
+            engine: value.engine.to_string(),
+            role: RegionRole::from(value.role()),
+            memtable_size: region_stat.memtable_size,
+            manifest_size: region_stat.manifest_size,
+            sst_size: region_stat.sst_size,
+        }
+    }
+}
+
+/// The key of the datanode stat in the memory store.
+///
+/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub struct DatanodeStatKey {
+    pub cluster_id: ClusterId,
+    pub node_id: u64,
+}
+
+impl DatanodeStatKey {
+    /// The key prefix.
+    pub fn prefix_key() -> Vec<u8> {
+        format!("{DATANODE_STAT_PREFIX}-").into_bytes()
+    }
+
+    /// The key prefix with the cluster id.
+    pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
+        format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
+    }
+}
+
+impl From<DatanodeStatKey> for Vec<u8> {
+    fn from(value: DatanodeStatKey) -> Self {
+        format!(
+            "{}-{}-{}",
+            DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
+        )
+        .into_bytes()
+    }
+}
+
+impl FromStr for DatanodeStatKey {
+    type Err = error::Error;
+
+    fn from_str(key: &str) -> Result<Self> {
+        let caps = DATANODE_STAT_KEY_PATTERN
+            .captures(key)
+            .context(error::InvalidStatKeySnafu { key })?;
+
+        ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
+
+        let cluster_id = caps[1].to_string();
+        let node_id = caps[2].to_string();
+        let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
+            err_msg: format!("invalid cluster_id: {cluster_id}"),
+        })?;
+        let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
+            err_msg: format!("invalid node_id: {node_id}"),
+        })?;
+
+        Ok(Self {
+            cluster_id,
+            node_id,
+        })
+    }
+}
+
+impl TryFrom<Vec<u8>> for DatanodeStatKey {
+    type Error = error::Error;
+
+    fn try_from(bytes: Vec<u8>) -> Result<Self> {
+        String::from_utf8(bytes)
+            .context(error::FromUtf8Snafu {
+                name: "DatanodeStatKey",
+            })
+            .map(|x| x.parse())?
+    }
+}
+
+/// The value of the datanode stat in the memory store.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(transparent)]
+pub struct DatanodeStatValue {
+    pub stats: Vec<Stat>,
+}
+
+impl DatanodeStatValue {
+    /// Get the latest number of regions.
+    pub fn region_num(&self) -> Option<u64> {
+        self.stats.last().map(|x| x.region_num)
+    }
+
+    /// Get the latest node addr.
+    pub fn node_addr(&self) -> Option<String> {
+        self.stats.last().map(|x| x.addr.clone())
+    }
+}
+
+impl TryFrom<DatanodeStatValue> for Vec<u8> {
+    type Error = error::Error;
+
+    fn try_from(stats: DatanodeStatValue) -> Result<Self> {
+        Ok(serde_json::to_string(&stats)
+            .context(error::SerializeToJsonSnafu {
+                input: format!("{stats:?}"),
+            })?
+            .into_bytes())
+    }
+}
+
+impl FromStr for DatanodeStatValue {
+    type Err = error::Error;
+
+    fn from_str(value: &str) -> Result<Self> {
+        serde_json::from_str(value).context(error::DeserializeFromJsonSnafu { input: value })
+    }
+}
+
+impl TryFrom<Vec<u8>> for DatanodeStatValue {
+    type Error = error::Error;
+
+    fn try_from(value: Vec<u8>) -> Result<Self> {
+        String::from_utf8(value)
+            .context(error::FromUtf8Snafu {
+                name: "DatanodeStatValue",
+            })
+            .map(|x| x.parse())?
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_stat_key() {
+        let stat = Stat {
+            cluster_id: 3,
+            id: 101,
+            region_num: 10,
+            ..Default::default()
+        };
+
+        let stat_key = stat.stat_key();
+
+        assert_eq!(3, stat_key.cluster_id);
+        assert_eq!(101, stat_key.node_id);
+    }
+
+    #[test]
+    fn test_stat_val_round_trip() {
+        let stat = Stat {
+            cluster_id: 0,
+            id: 101,
+            region_num: 100,
+            ..Default::default()
+        };
+
+        let stat_val = DatanodeStatValue { stats: vec![stat] };
+
+        let bytes: Vec<u8> = stat_val.try_into().unwrap();
+        let stat_val: DatanodeStatValue = bytes.try_into().unwrap();
+        let stats = stat_val.stats;
+
+        assert_eq!(1, stats.len());
+
+        let stat = stats.first().unwrap();
+        assert_eq!(0, stat.cluster_id);
+        assert_eq!(101, stat.id);
+        assert_eq!(100, stat.region_num);
+    }
+
+    #[test]
+    fn test_get_addr_from_stat_val() {
+        let empty = DatanodeStatValue { stats: vec![] };
+        let addr = empty.node_addr();
+        assert!(addr.is_none());
+
+        let stat_val = DatanodeStatValue {
+            stats: vec![
+                Stat {
+                    addr: "1".to_string(),
+                    ..Default::default()
+                },
+                Stat {
+                    addr: "2".to_string(),
+                    ..Default::default()
+                },
+                Stat {
+                    addr: "3".to_string(),
+                    ..Default::default()
+                },
+            ],
+        };
+        let addr = stat_val.node_addr().unwrap();
+        assert_eq!("3", addr);
+    }
+
+    #[test]
+    fn test_get_region_num_from_stat_val() {
+        let empty = DatanodeStatValue { stats: vec![] };
+        let region_num = empty.region_num();
+        assert!(region_num.is_none());
+
+        let wrong = DatanodeStatValue {
+            stats: vec![Stat {
+                region_num: 0,
+                ..Default::default()
+            }],
+        };
+        let right = wrong.region_num();
+        assert_eq!(Some(0), right);
+
+        let stat_val = DatanodeStatValue {
+            stats: vec![
+                Stat {
+                    region_num: 1,
+                    ..Default::default()
+                },
+                Stat {
+                    region_num: 0,
+                    ..Default::default()
+                },
+                Stat {
+                    region_num: 2,
+                    ..Default::default()
+                },
+            ],
+        };
+        let region_num = stat_val.region_num().unwrap();
+        assert_eq!(2, region_num);
+    }
+}
--- a/src/common/meta/src/ddl.rs
+++ b/src/common/meta/src/ddl.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;

+use api::v1::meta::ProcedureDetailResponse;
 use common_telemetry::tracing_context::W3cTrace;
 use store_api::storage::{RegionId, RegionNumber, TableId};

@@ -82,6 +83,8 @@ pub trait ProcedureExecutor: Send + Sync {
        ctx: &ExecutorContext,
        pid: &str,
    ) -> Result<ProcedureStateResponse>;
+
+    async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
 }

 pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
--- a/src/common/meta/src/ddl/alter_table/region_request.rs
+++ b/src/common/meta/src/ddl/alter_table/region_request.rs
@@ -187,7 +187,7 @@ mod tests {
                    region: Region::new_test(region_id),
                    leader_peer: Some(Peer::empty(1)),
                    follower_peers: vec![],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                }]),
                HashMap::new(),
--- a/src/common/meta/src/ddl/tests/alter_table.rs
+++ b/src/common/meta/src/ddl/tests/alter_table.rs
@@ -107,21 +107,21 @@ async fn test_on_submit_alter_request() {
                    region: Region::new_test(RegionId::new(table_id, 1)),
                    leader_peer: Some(Peer::empty(1)),
                    follower_peers: vec![Peer::empty(5)],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
                RegionRoute {
                    region: Region::new_test(RegionId::new(table_id, 2)),
                    leader_peer: Some(Peer::empty(2)),
                    follower_peers: vec![Peer::empty(4)],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
                RegionRoute {
                    region: Region::new_test(RegionId::new(table_id, 3)),
                    leader_peer: Some(Peer::empty(3)),
                    follower_peers: vec![],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
            ]),
@@ -193,21 +193,21 @@ async fn test_on_submit_alter_request_with_outdated_request() {
                    region: Region::new_test(RegionId::new(table_id, 1)),
                    leader_peer: Some(Peer::empty(1)),
                    follower_peers: vec![Peer::empty(5)],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
                RegionRoute {
                    region: Region::new_test(RegionId::new(table_id, 2)),
                    leader_peer: Some(Peer::empty(2)),
                    follower_peers: vec![Peer::empty(4)],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
                RegionRoute {
                    region: Region::new_test(RegionId::new(table_id, 3)),
                    leader_peer: Some(Peer::empty(3)),
                    follower_peers: vec![],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
            ]),
--- a/src/common/meta/src/ddl/tests/drop_table.rs
+++ b/src/common/meta/src/ddl/tests/drop_table.rs
@@ -119,21 +119,21 @@ async fn test_on_datanode_drop_regions() {
                    region: Region::new_test(RegionId::new(table_id, 1)),
                    leader_peer: Some(Peer::empty(1)),
                    follower_peers: vec![Peer::empty(5)],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
                RegionRoute {
                    region: Region::new_test(RegionId::new(table_id, 2)),
                    leader_peer: Some(Peer::empty(2)),
                    follower_peers: vec![Peer::empty(4)],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
                RegionRoute {
                    region: Region::new_test(RegionId::new(table_id, 3)),
                    leader_peer: Some(Peer::empty(3)),
                    follower_peers: vec![],
-                    leader_status: None,
+                    leader_state: None,
                    leader_down_since: None,
                },
            ]),
--- a/src/common/meta/src/ddl/utils.rs
+++ b/src/common/meta/src/ddl/utils.rs
@@ -18,6 +18,7 @@ use common_procedure::error::Error as ProcedureError;
 use snafu::{ensure, OptionExt, ResultExt};
 use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
 use table::metadata::TableId;
+use table::table_reference::TableReference;

 use crate::ddl::DetectingRegion;
 use crate::error::{Error, OperateDatanodeSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu};
@@ -109,8 +110,8 @@ pub async fn check_and_get_physical_table_id(
        .table_name_manager()
        .get(physical_table_name)
        .await?
-        .context(TableNotFoundSnafu {
-            table_name: physical_table_name.to_string(),
+        .with_context(|| TableNotFoundSnafu {
+            table_name: TableReference::from(physical_table_name).to_string(),
        })
        .map(|table| table.table_id())
 }
@@ -123,8 +124,8 @@ pub async fn get_physical_table_id(
        .table_name_manager()
        .get(logical_table_name)
        .await?
-        .context(TableNotFoundSnafu {
-            table_name: logical_table_name.to_string(),
+        .with_context(|| TableNotFoundSnafu {
+            table_name: TableReference::from(logical_table_name).to_string(),
        })
        .map(|table| table.table_id())?;

--- a/src/common/meta/src/ddl_manager.rs
+++ b/src/common/meta/src/ddl_manager.rs
@@ -14,6 +14,7 @@

 use std::sync::Arc;

+use api::v1::meta::ProcedureDetailResponse;
 use common_procedure::{
    watcher, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId,
 };
@@ -825,6 +826,15 @@ impl ProcedureExecutor for DdlManager {

        Ok(procedure::procedure_state_to_pb_response(&state))
    }
+
+    async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
+        let metas = self
+            .procedure_manager
+            .list_procedures()
+            .await
+            .context(QueryProcedureSnafu)?;
+        Ok(procedure::procedure_details_to_pb_response(metas))
+    }
 }

 #[cfg(test)]
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -147,6 +147,20 @@ pub enum Error {
        source: common_procedure::Error,
    },

+    #[snafu(display("Failed to start procedure manager"))]
+    StartProcedureManager {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_procedure::Error,
+    },
+
+    #[snafu(display("Failed to stop procedure manager"))]
+    StopProcedureManager {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_procedure::Error,
+    },
+
    #[snafu(display(
        "Failed to get procedure output, procedure id: {procedure_id}, error: {err_msg}"
    ))]
@@ -218,6 +232,24 @@ pub enum Error {
        error: JsonError,
    },

+    #[snafu(display("Failed to serialize to json: {}", input))]
+    SerializeToJson {
+        input: String,
+        #[snafu(source)]
+        error: serde_json::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to deserialize from json: {}", input))]
+    DeserializeFromJson {
+        input: String,
+        #[snafu(source)]
+        error: serde_json::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Payload not exist"))]
    PayloadNotExist {
        #[snafu(implicit)]
@@ -531,13 +563,20 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Invalid  node info key: {}", key))]
+    #[snafu(display("Invalid node info key: {}", key))]
    InvalidNodeInfoKey {
        key: String,
        #[snafu(implicit)]
        location: Location,
    },

+    #[snafu(display("Invalid node stat key: {}", key))]
+    InvalidStatKey {
+        key: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Failed to parse number: {}", err_msg))]
    ParseNum {
        err_msg: String,
@@ -627,7 +666,9 @@ impl ErrorExt for Error {
            | EtcdTxnFailed { .. }
            | ConnectEtcd { .. }
            | MoveValues { .. }
-            | GetCache { .. } => StatusCode::Internal,
+            | GetCache { .. }
+            | SerializeToJson { .. }
+            | DeserializeFromJson { .. } => StatusCode::Internal,

            ValueNotExist { .. } => StatusCode::Unexpected,

@@ -688,7 +729,9 @@ impl ErrorExt for Error {

            SubmitProcedure { source, .. }
            | QueryProcedure { source, .. }
-            | WaitProcedure { source, .. } => source.status_code(),
+            | WaitProcedure { source, .. }
+            | StartProcedureManager { source, .. }
+            | StopProcedureManager { source, .. } => source.status_code(),
            RegisterProcedureLoader { source, .. } => source.status_code(),
            External { source, .. } => source.status_code(),
            OperateDatanode { source, .. } => source.status_code(),
@@ -700,6 +743,7 @@ impl ErrorExt for Error {
            | InvalidNumTopics { .. }
            | SchemaNotFound { .. }
            | InvalidNodeInfoKey { .. }
+            | InvalidStatKey { .. }
            | ParseNum { .. }
            | InvalidRole { .. }
            | EmptyDdlTasks { .. } => StatusCode::InvalidArguments,
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -132,11 +132,22 @@ impl OpenRegion {
 pub struct DowngradeRegion {
    /// The [RegionId].
    pub region_id: RegionId,
+    /// The timeout of waiting for flush the region.
+    ///
+    /// `None` stands for don't flush before downgrading the region.
+    #[serde(default)]
+    pub flush_timeout: Option<Duration>,
+    /// Rejects all write requests after flushing.
+    pub reject_write: bool,
 }

 impl Display for DowngradeRegion {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "DowngradeRegion(region_id={})", self.region_id)
+        write!(
+            f,
+            "DowngradeRegion(region_id={}, flush_timeout={:?}, rejct_write={})",
+            self.region_id, self.flush_timeout, self.reject_write
+        )
    }
 }

@@ -152,7 +163,7 @@ pub struct UpgradeRegion {
    /// `None` stands for no wait,
    /// it's helpful to verify whether the leader region is ready.
    #[serde(with = "humantime_serde")]
-    pub wait_for_replay_timeout: Option<Duration>,
+    pub replay_timeout: Option<Duration>,
    /// The hint for replaying memtable.
    #[serde(default)]
    pub location_id: Option<u64>,
--- a/src/common/meta/src/key.rs
+++ b/src/common/meta/src/key.rs
@@ -140,11 +140,11 @@ use crate::key::table_route::TableRouteKey;
 use crate::key::txn_helper::TxnOpGetResponseSet;
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
-use crate::rpc::router::{region_distribution, RegionRoute, RegionStatus};
+use crate::rpc::router::{region_distribution, LeaderState, RegionRoute};
 use crate::rpc::store::BatchDeleteRequest;
 use crate::DatanodeId;

-pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.]*";
+pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*";
 pub const MAINTENANCE_KEY: &str = "__maintenance";

 const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
@@ -1126,14 +1126,14 @@ impl TableMetadataManager {
        next_region_route_status: F,
    ) -> Result<()>
    where
-        F: Fn(&RegionRoute) -> Option<Option<RegionStatus>>,
+        F: Fn(&RegionRoute) -> Option<Option<LeaderState>>,
    {
        let mut new_region_routes = current_table_route_value.region_routes()?.clone();

        let mut updated = 0;
        for route in &mut new_region_routes {
-            if let Some(status) = next_region_route_status(route) {
-                if route.set_leader_status(status) {
+            if let Some(state) = next_region_route_status(route) {
+                if route.set_leader_state(state) {
                    updated += 1;
                }
            }
@@ -1280,7 +1280,7 @@ mod tests {
    use crate::key::{DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue};
    use crate::kv_backend::memory::MemoryKvBackend;
    use crate::peer::Peer;
-    use crate::rpc::router::{region_distribution, Region, RegionRoute, RegionStatus};
+    use crate::rpc::router::{region_distribution, LeaderState, Region, RegionRoute};

    #[test]
    fn test_deserialized_value_with_bytes() {
@@ -1324,7 +1324,7 @@ mod tests {
            },
            leader_peer: Some(Peer::new(datanode, "a2")),
            follower_peers: vec![],
-            leader_status: None,
+            leader_state: None,
            leader_down_since: None,
        }
    }
@@ -1715,7 +1715,7 @@ mod tests {
                    attrs: BTreeMap::new(),
                },
                leader_peer: Some(Peer::new(datanode, "a2")),
-                leader_status: Some(RegionStatus::Downgraded),
+                leader_state: Some(LeaderState::Downgrading),
                follower_peers: vec![],
                leader_down_since: Some(current_time_millis()),
            },
@@ -1727,7 +1727,7 @@ mod tests {
                    attrs: BTreeMap::new(),
                },
                leader_peer: Some(Peer::new(datanode, "a1")),
-                leader_status: None,
+                leader_state: None,
                follower_peers: vec![],
                leader_down_since: None,
            },
@@ -1750,10 +1750,10 @@ mod tests {

        table_metadata_manager
            .update_leader_region_status(table_id, &current_table_route_value, |region_route| {
-                if region_route.leader_status.is_some() {
+                if region_route.leader_state.is_some() {
                    None
                } else {
-                    Some(Some(RegionStatus::Downgraded))
+                    Some(Some(LeaderState::Downgrading))
                }
            })
            .await
@@ -1768,8 +1768,8 @@ mod tests {
            .unwrap();

        assert_eq!(
-            updated_route_value.region_routes().unwrap()[0].leader_status,
-            Some(RegionStatus::Downgraded)
+            updated_route_value.region_routes().unwrap()[0].leader_state,
+            Some(LeaderState::Downgrading)
        );

        assert!(updated_route_value.region_routes().unwrap()[0]
@@ -1777,8 +1777,8 @@ mod tests {
            .is_some());

        assert_eq!(
-            updated_route_value.region_routes().unwrap()[1].leader_status,
-            Some(RegionStatus::Downgraded)
+            updated_route_value.region_routes().unwrap()[1].leader_state,
+            Some(LeaderState::Downgrading)
        );
        assert!(updated_route_value.region_routes().unwrap()[1]
            .leader_down_since
@@ -1943,21 +1943,21 @@ mod tests {
                        region: Region::new_test(RegionId::new(table_id, 1)),
                        leader_peer: Some(Peer::empty(1)),
                        follower_peers: vec![Peer::empty(5)],
-                        leader_status: None,
+                        leader_state: None,
                        leader_down_since: None,
                    },
                    RegionRoute {
                        region: Region::new_test(RegionId::new(table_id, 2)),
                        leader_peer: Some(Peer::empty(2)),
                        follower_peers: vec![Peer::empty(4)],
-                        leader_status: None,
+                        leader_state: None,
                        leader_down_since: None,
                    },
                    RegionRoute {
                        region: Region::new_test(RegionId::new(table_id, 3)),
                        leader_peer: Some(Peer::empty(3)),
                        follower_peers: vec![],
-                        leader_status: None,
+                        leader_state: None,
                        leader_down_since: None,
                    },
                ]),
@@ -1996,21 +1996,21 @@ mod tests {
                        region: Region::new_test(RegionId::new(table_id, 1)),
                        leader_peer: Some(Peer::empty(1)),
                        follower_peers: vec![Peer::empty(5)],
-                        leader_status: None,
+                        leader_state: None,
                        leader_down_since: None,
                    },
                    RegionRoute {
                        region: Region::new_test(RegionId::new(table_id, 2)),
                        leader_peer: Some(Peer::empty(2)),
                        follower_peers: vec![Peer::empty(4)],
-                        leader_status: None,
+                        leader_state: None,
                        leader_down_since: None,
                    },
                    RegionRoute {
                        region: Region::new_test(RegionId::new(table_id, 3)),
                        leader_peer: Some(Peer::empty(3)),
                        follower_peers: vec![],
-                        leader_status: None,
+                        leader_state: None,
                        leader_down_since: None,
                    },
                ]),
--- a/src/common/meta/src/key/table_name.rs
+++ b/src/common/meta/src/key/table_name.rs
@@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize};
 use snafu::OptionExt;
 use table::metadata::TableId;
 use table::table_name::TableName;
+use table::table_reference::TableReference;

 use super::{MetadataKey, MetadataValue, TABLE_NAME_KEY_PATTERN, TABLE_NAME_KEY_PREFIX};
 use crate::error::{Error, InvalidMetadataSnafu, Result};
@@ -122,6 +123,16 @@ impl From<TableNameKey<'_>> for TableName {
    }
 }

+impl<'a> From<TableNameKey<'a>> for TableReference<'a> {
+    fn from(value: TableNameKey<'a>) -> Self {
+        Self {
+            catalog: value.catalog,
+            schema: value.schema,
+            table: value.table,
+        }
+    }
+}
+
 impl<'a> TryFrom<&'a str> for TableNameKey<'a> {
    type Error = Error;

--- a/src/common/meta/src/key/table_route.rs
+++ b/src/common/meta/src/key/table_route.rs
@@ -744,6 +744,7 @@ mod tests {
    use crate::kv_backend::memory::MemoryKvBackend;
    use crate::kv_backend::{KvBackend, TxnService};
    use crate::peer::Peer;
+    use crate::rpc::router::Region;
    use crate::rpc::store::PutRequest;

    #[test]
@@ -751,11 +752,43 @@ mod tests {
        let old_raw_v = r#"{"region_routes":[{"region":{"id":1,"name":"r1","partition":null,"attrs":{}},"leader_peer":{"id":2,"addr":"a2"},"follower_peers":[]},{"region":{"id":1,"name":"r1","partition":null,"attrs":{}},"leader_peer":{"id":2,"addr":"a2"},"follower_peers":[]}],"version":0}"#;
        let v = TableRouteValue::try_from_raw_value(old_raw_v.as_bytes()).unwrap();

-        let new_raw_v = format!("{:?}", v);
-        assert_eq!(
-            new_raw_v,
-            r#"Physical(PhysicalTableRouteValue { region_routes: [RegionRoute { region: Region { id: 1(0, 1), name: "r1", partition: None, attrs: {} }, leader_peer: Some(Peer { id: 2, addr: "a2" }), follower_peers: [], leader_status: None, leader_down_since: None }, RegionRoute { region: Region { id: 1(0, 1), name: "r1", partition: None, attrs: {} }, leader_peer: Some(Peer { id: 2, addr: "a2" }), follower_peers: [], leader_status: None, leader_down_since: None }], version: 0 })"#
-        );
+        let expected_table_route = TableRouteValue::Physical(PhysicalTableRouteValue {
+            region_routes: vec![
+                RegionRoute {
+                    region: Region {
+                        id: RegionId::new(0, 1),
+                        name: "r1".to_string(),
+                        partition: None,
+                        attrs: Default::default(),
+                    },
+                    leader_peer: Some(Peer {
+                        id: 2,
+                        addr: "a2".to_string(),
+                    }),
+                    follower_peers: vec![],
+                    leader_state: None,
+                    leader_down_since: None,
+                },
+                RegionRoute {
+                    region: Region {
+                        id: RegionId::new(0, 1),
+                        name: "r1".to_string(),
+                        partition: None,
+                        attrs: Default::default(),
+                    },
+                    leader_peer: Some(Peer {
+                        id: 2,
+                        addr: "a2".to_string(),
+                    }),
+                    follower_peers: vec![],
+                    leader_state: None,
+                    leader_down_since: None,
+                },
+            ],
+            version: 0,
+        });
+
+        assert_eq!(v, expected_table_route);
    }

    #[test]
--- a/src/common/meta/src/leadership_notifier.rs
+++ b/src/common/meta/src/leadership_notifier.rs
@@ -0,0 +1,183 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Mutex};
+
+use async_trait::async_trait;
+use common_telemetry::{error, info};
+
+use crate::error::Result;
+
+pub type LeadershipChangeNotifierCustomizerRef = Arc<dyn LeadershipChangeNotifierCustomizer>;
+
+/// A trait for customizing the leadership change notifier.
+pub trait LeadershipChangeNotifierCustomizer: Send + Sync {
+    fn customize(&self, notifier: &mut LeadershipChangeNotifier);
+
+    fn add_listener(&self, listener: Arc<dyn LeadershipChangeListener>);
+}
+
+/// A trait for handling leadership change events in a distributed system.
+#[async_trait]
+pub trait LeadershipChangeListener: Send + Sync {
+    /// Returns the listener name.
+    fn name(&self) -> &str;
+
+    /// Called when the node transitions to the leader role.
+    async fn on_leader_start(&self) -> Result<()>;
+
+    /// Called when the node transitions to the follower role.
+    async fn on_leader_stop(&self) -> Result<()>;
+}
+
+/// A notifier for leadership change events.
+#[derive(Default)]
+pub struct LeadershipChangeNotifier {
+    listeners: Vec<Arc<dyn LeadershipChangeListener>>,
+}
+
+#[derive(Default)]
+pub struct DefaultLeadershipChangeNotifierCustomizer {
+    listeners: Mutex<Vec<Arc<dyn LeadershipChangeListener>>>,
+}
+
+impl DefaultLeadershipChangeNotifierCustomizer {
+    pub fn new() -> Self {
+        Self {
+            listeners: Mutex::new(Vec::new()),
+        }
+    }
+}
+
+impl LeadershipChangeNotifierCustomizer for DefaultLeadershipChangeNotifierCustomizer {
+    fn customize(&self, notifier: &mut LeadershipChangeNotifier) {
+        info!("Customizing leadership change notifier");
+        let listeners = self.listeners.lock().unwrap().clone();
+        notifier.listeners.extend(listeners);
+    }
+
+    fn add_listener(&self, listener: Arc<dyn LeadershipChangeListener>) {
+        self.listeners.lock().unwrap().push(listener);
+    }
+}
+
+impl LeadershipChangeNotifier {
+    /// Adds a listener to the notifier.
+    pub fn add_listener(&mut self, listener: Arc<dyn LeadershipChangeListener>) {
+        self.listeners.push(listener);
+    }
+
+    /// Notify all listeners that the node has become a leader.
+    pub async fn notify_on_leader_start(&self) {
+        for listener in &self.listeners {
+            if let Err(err) = listener.on_leader_start().await {
+                error!(
+                    err;
+                    "Failed to notify listener: {}, event 'on_leader_start'",
+                    listener.name()
+                );
+            }
+        }
+    }
+
+    /// Notify all listeners that the node has become a follower.
+    pub async fn notify_on_leader_stop(&self) {
+        for listener in &self.listeners {
+            if let Err(err) = listener.on_leader_stop().await {
+                error!(
+                    err;
+                    "Failed to notify listener: {}, event: 'on_follower_start'",
+                    listener.name()
+                );
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::atomic::{AtomicBool, Ordering};
+    use std::sync::Arc;
+
+    use super::*;
+
+    struct MockListener {
+        name: String,
+        on_leader_start_fn: Option<Box<dyn Fn() -> Result<()> + Send + Sync>>,
+        on_follower_start_fn: Option<Box<dyn Fn() -> Result<()> + Send + Sync>>,
+    }
+
+    #[async_trait::async_trait]
+    impl LeadershipChangeListener for MockListener {
+        fn name(&self) -> &str {
+            &self.name
+        }
+
+        async fn on_leader_start(&self) -> Result<()> {
+            if let Some(f) = &self.on_leader_start_fn {
+                return f();
+            }
+            Ok(())
+        }
+
+        async fn on_leader_stop(&self) -> Result<()> {
+            if let Some(f) = &self.on_follower_start_fn {
+                return f();
+            }
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_leadership_change_notifier() {
+        let mut notifier = LeadershipChangeNotifier::default();
+        let listener1 = Arc::new(MockListener {
+            name: "listener1".to_string(),
+            on_leader_start_fn: None,
+            on_follower_start_fn: None,
+        });
+        let called_on_leader_start = Arc::new(AtomicBool::new(false));
+        let called_on_follower_start = Arc::new(AtomicBool::new(false));
+        let called_on_leader_start_moved = called_on_leader_start.clone();
+        let called_on_follower_start_moved = called_on_follower_start.clone();
+        let listener2 = Arc::new(MockListener {
+            name: "listener2".to_string(),
+            on_leader_start_fn: Some(Box::new(move || {
+                called_on_leader_start_moved.store(true, Ordering::Relaxed);
+                Ok(())
+            })),
+            on_follower_start_fn: Some(Box::new(move || {
+                called_on_follower_start_moved.store(true, Ordering::Relaxed);
+                Ok(())
+            })),
+        });
+
+        notifier.add_listener(listener1);
+        notifier.add_listener(listener2);
+
+        let listener1 = notifier.listeners.first().unwrap();
+        let listener2 = notifier.listeners.get(1).unwrap();
+
+        assert_eq!(listener1.name(), "listener1");
+        assert_eq!(listener2.name(), "listener2");
+
+        notifier.notify_on_leader_start().await;
+        assert!(!called_on_follower_start.load(Ordering::Relaxed));
+        assert!(called_on_leader_start.load(Ordering::Relaxed));
+
+        notifier.notify_on_leader_stop().await;
+        assert!(called_on_follower_start.load(Ordering::Relaxed));
+        assert!(called_on_leader_start.load(Ordering::Relaxed));
+    }
+}
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -22,6 +22,7 @@
 pub mod cache;
 pub mod cache_invalidator;
 pub mod cluster;
+pub mod datanode;
 pub mod ddl;
 pub mod ddl_manager;
 pub mod distributed_time_constants;
@@ -31,6 +32,7 @@ pub mod heartbeat;
 pub mod instruction;
 pub mod key;
 pub mod kv_backend;
+pub mod leadership_notifier;
 pub mod lock_key;
 pub mod metrics;
 pub mod node_manager;
--- a/src/common/meta/src/region_keeper.rs
+++ b/src/common/meta/src/region_keeper.rs
@@ -58,7 +58,7 @@ impl MemoryRegionKeeper {
        Default::default()
    }

-    /// Returns [OpeningRegionGuard] if Region(`region_id`) on Peer(`datanode_id`) does not exist.
+    /// Returns [OperatingRegionGuard] if Region(`region_id`) on Peer(`datanode_id`) does not exist.
    pub fn register(
        &self,
        datanode_id: DatanodeId,
--- a/src/common/meta/src/rpc/procedure.rs
+++ b/src/common/meta/src/rpc/procedure.rs
@@ -16,10 +16,11 @@ use std::time::Duration;

 pub use api::v1::meta::{MigrateRegionResponse, ProcedureStateResponse};
 use api::v1::meta::{
-    ProcedureId as PbProcedureId, ProcedureStateResponse as PbProcedureStateResponse,
+    ProcedureDetailResponse as PbProcedureDetailResponse, ProcedureId as PbProcedureId,
+    ProcedureMeta as PbProcedureMeta, ProcedureStateResponse as PbProcedureStateResponse,
    ProcedureStatus as PbProcedureStatus,
 };
-use common_procedure::{ProcedureId, ProcedureState};
+use common_procedure::{ProcedureId, ProcedureInfo, ProcedureState};
 use snafu::ResultExt;

 use crate::error::{ParseProcedureIdSnafu, Result};
@@ -30,7 +31,7 @@ pub struct MigrateRegionRequest {
    pub region_id: u64,
    pub from_peer: u64,
    pub to_peer: u64,
-    pub replay_timeout: Duration,
+    pub timeout: Duration,
 }

 /// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`].
@@ -49,9 +50,9 @@ pub fn pid_to_pb_pid(pid: ProcedureId) -> PbProcedureId {
    }
 }

-/// Cast the common [`ProcedureState`] to pb [`ProcedureStateResponse`].
-pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStateResponse {
-    let (status, error) = match state {
+/// Cast the [`ProcedureState`] to protobuf [`PbProcedureStatus`].
+pub fn procedure_state_to_pb_state(state: &ProcedureState) -> (PbProcedureStatus, String) {
+    match state {
        ProcedureState::Running => (PbProcedureStatus::Running, String::default()),
        ProcedureState::Done { .. } => (PbProcedureStatus::Done, String::default()),
        ProcedureState::Retrying { error } => (PbProcedureStatus::Retrying, error.to_string()),
@@ -62,8 +63,12 @@ pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStat
        ProcedureState::RollingBack { error } => {
            (PbProcedureStatus::RollingBack, error.to_string())
        }
-    };
+    }
+}

+/// Cast the common [`ProcedureState`] to pb [`ProcedureStateResponse`].
+pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStateResponse {
+    let (status, error) = procedure_state_to_pb_state(state);
    PbProcedureStateResponse {
        status: status.into(),
        error,
@@ -71,6 +76,28 @@ pub fn procedure_state_to_pb_response(state: &ProcedureState) -> PbProcedureStat
    }
 }

+pub fn procedure_details_to_pb_response(metas: Vec<ProcedureInfo>) -> PbProcedureDetailResponse {
+    let procedures = metas
+        .into_iter()
+        .map(|meta| {
+            let (status, error) = procedure_state_to_pb_state(&meta.state);
+            PbProcedureMeta {
+                id: Some(pid_to_pb_pid(meta.id)),
+                type_name: meta.type_name.to_string(),
+                status: status.into(),
+                start_time_ms: meta.start_time_ms,
+                end_time_ms: meta.end_time_ms,
+                lock_keys: meta.lock_keys,
+                error,
+            }
+        })
+        .collect();
+    PbProcedureDetailResponse {
+        procedures,
+        ..Default::default()
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
--- a/src/common/meta/src/rpc/router.rs
+++ b/src/common/meta/src/rpc/router.rs
@@ -108,16 +108,16 @@ pub fn convert_to_region_peer_map(
        .collect::<HashMap<_, _>>()
 }

-/// Returns the HashMap<[RegionNumber], [RegionStatus]>;
-pub fn convert_to_region_leader_status_map(
+/// Returns the HashMap<[RegionNumber], [LeaderState]>;
+pub fn convert_to_region_leader_state_map(
    region_routes: &[RegionRoute],
-) -> HashMap<RegionNumber, RegionStatus> {
+) -> HashMap<RegionNumber, LeaderState> {
    region_routes
        .iter()
        .filter_map(|x| {
-            x.leader_status
+            x.leader_state
                .as_ref()
-                .map(|status| (x.region.id.region_number(), *status))
+                .map(|state| (x.region.id.region_number(), *state))
        })
        .collect::<HashMap<_, _>>()
 }
@@ -205,7 +205,7 @@ impl TableRoute {
                region,
                leader_peer,
                follower_peers,
-                leader_status: None,
+                leader_state: None,
                leader_down_since: None,
            });
        }
@@ -259,9 +259,13 @@ pub struct RegionRoute {
    pub follower_peers: Vec<Peer>,
    /// `None` by default.
    #[builder(setter(into, strip_option), default)]
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub leader_status: Option<RegionStatus>,
-    /// The start time when the leader is in `Downgraded` status.
+    #[serde(
+        default,
+        alias = "leader_status",
+        skip_serializing_if = "Option::is_none"
+    )]
+    pub leader_state: Option<LeaderState>,
+    /// The start time when the leader is in `Downgraded` state.
    #[serde(default)]
    #[builder(default = "self.default_leader_down_since()")]
    pub leader_down_since: Option<i64>,
@@ -269,76 +273,79 @@ pub struct RegionRoute {

 impl RegionRouteBuilder {
    fn default_leader_down_since(&self) -> Option<i64> {
-        match self.leader_status {
-            Some(Some(RegionStatus::Downgraded)) => Some(current_time_millis()),
+        match self.leader_state {
+            Some(Some(LeaderState::Downgrading)) => Some(current_time_millis()),
            _ => None,
        }
    }
 }

-/// The Status of the [Region].
+/// The State of the [`Region`] Leader.
 /// TODO(dennis): It's better to add more fine-grained statuses such as `PENDING` etc.
 #[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, AsRefStr)]
 #[strum(serialize_all = "UPPERCASE")]
-pub enum RegionStatus {
-    /// The following cases in which the [Region] will be downgraded.
+pub enum LeaderState {
+    /// The following cases in which the [`Region`] will be downgraded.
    ///
-    /// - The [Region] is unavailable(e.g., Crashed, Network disconnected).
-    /// - The [Region] was planned to migrate to another [Peer].
-    Downgraded,
+    /// - The [`Region`] may be unavailable (e.g., Crashed, Network disconnected).
+    /// - The [`Region`] was planned to migrate to another [`Peer`].
+    #[serde(alias = "Downgraded")]
+    Downgrading,
 }

 impl RegionRoute {
-    /// Returns true if the Leader [Region] is downgraded.
+    /// Returns true if the Leader [`Region`] is downgraded.
    ///
-    /// The following cases in which the [Region] will be downgraded.
+    /// The following cases in which the [`Region`] will be downgraded.
    ///
-    /// - The [Region] is unavailable(e.g., Crashed, Network disconnected).
-    /// - The [Region] was planned to migrate to another [Peer].
+    /// - The [`Region`] is unavailable(e.g., Crashed, Network disconnected).
+    /// - The [`Region`] was planned to migrate to another [`Peer`].
    ///
-    pub fn is_leader_downgraded(&self) -> bool {
-        matches!(self.leader_status, Some(RegionStatus::Downgraded))
+    pub fn is_leader_downgrading(&self) -> bool {
+        matches!(self.leader_state, Some(LeaderState::Downgrading))
    }

-    /// Marks the Leader [Region] as downgraded.
+    /// Marks the Leader [`Region`] as [`RegionState::Downgrading`].
    ///
-    /// We should downgrade a [Region] before deactivating it:
+    /// We should downgrade a [`Region`] before deactivating it:
    ///
-    /// - During the [Region] Failover Procedure.
-    /// - Migrating a [Region].
+    /// - During the [`Region`] Failover Procedure.
+    /// - Migrating a [`Region`].
    ///
-    /// **Notes:** Meta Server will stop renewing the lease for the downgraded [Region].
+    /// **Notes:** Meta Server will renewing a special lease(`Downgrading`) for the downgrading [`Region`].
+    ///
+    /// A downgrading region will reject any write requests, and only allow memetable to be flushed to object storage
    ///
    pub fn downgrade_leader(&mut self) {
        self.leader_down_since = Some(current_time_millis());
-        self.leader_status = Some(RegionStatus::Downgraded)
+        self.leader_state = Some(LeaderState::Downgrading)
    }

-    /// Returns how long since the leader is in `Downgraded` status.
+    /// Returns how long since the leader is in `Downgraded` state.
    pub fn leader_down_millis(&self) -> Option<i64> {
        self.leader_down_since
            .map(|start| current_time_millis() - start)
    }

-    /// Sets the leader status.
+    /// Sets the leader state.
    ///
    /// Returns true if updated.
-    pub fn set_leader_status(&mut self, status: Option<RegionStatus>) -> bool {
-        let updated = self.leader_status != status;
+    pub fn set_leader_state(&mut self, state: Option<LeaderState>) -> bool {
+        let updated = self.leader_state != state;

-        match (status, updated) {
-            (Some(RegionStatus::Downgraded), true) => {
+        match (state, updated) {
+            (Some(LeaderState::Downgrading), true) => {
                self.leader_down_since = Some(current_time_millis());
            }
-            (Some(RegionStatus::Downgraded), false) => {
-                // Do nothing if leader is still in `Downgraded` status.
+            (Some(LeaderState::Downgrading), false) => {
+                // Do nothing if leader is still in `Downgraded` state.
            }
            _ => {
                self.leader_down_since = None;
            }
        }

-        self.leader_status = status;
+        self.leader_state = state;
        updated
    }
 }
@@ -477,15 +484,15 @@ mod tests {
            },
            leader_peer: Some(Peer::new(1, "a1")),
            follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
-            leader_status: None,
+            leader_state: None,
            leader_down_since: None,
        };

-        assert!(!region_route.is_leader_downgraded());
+        assert!(!region_route.is_leader_downgrading());

        region_route.downgrade_leader();

-        assert!(region_route.is_leader_downgraded());
+        assert!(region_route.is_leader_downgrading());
    }

    #[test]
@@ -499,7 +506,7 @@ mod tests {
            },
            leader_peer: Some(Peer::new(1, "a1")),
            follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
-            leader_status: None,
+            leader_state: None,
            leader_down_since: None,
        };

@@ -510,6 +517,73 @@ mod tests {
        assert_eq!(decoded, region_route);
    }

+    #[test]
+    fn test_region_route_compatibility() {
+        let region_route = RegionRoute {
+            region: Region {
+                id: 2.into(),
+                name: "r2".to_string(),
+                partition: None,
+                attrs: BTreeMap::new(),
+            },
+            leader_peer: Some(Peer::new(1, "a1")),
+            follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
+            leader_state: Some(LeaderState::Downgrading),
+            leader_down_since: None,
+        };
+        let input = r#"{"region":{"id":2,"name":"r2","partition":null,"attrs":{}},"leader_peer":{"id":1,"addr":"a1"},"follower_peers":[{"id":2,"addr":"a2"},{"id":3,"addr":"a3"}],"leader_state":"Downgraded","leader_down_since":null}"#;
+        let decoded: RegionRoute = serde_json::from_str(input).unwrap();
+        assert_eq!(decoded, region_route);
+
+        let region_route = RegionRoute {
+            region: Region {
+                id: 2.into(),
+                name: "r2".to_string(),
+                partition: None,
+                attrs: BTreeMap::new(),
+            },
+            leader_peer: Some(Peer::new(1, "a1")),
+            follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
+            leader_state: Some(LeaderState::Downgrading),
+            leader_down_since: None,
+        };
+        let input = r#"{"region":{"id":2,"name":"r2","partition":null,"attrs":{}},"leader_peer":{"id":1,"addr":"a1"},"follower_peers":[{"id":2,"addr":"a2"},{"id":3,"addr":"a3"}],"leader_status":"Downgraded","leader_down_since":null}"#;
+        let decoded: RegionRoute = serde_json::from_str(input).unwrap();
+        assert_eq!(decoded, region_route);
+
+        let region_route = RegionRoute {
+            region: Region {
+                id: 2.into(),
+                name: "r2".to_string(),
+                partition: None,
+                attrs: BTreeMap::new(),
+            },
+            leader_peer: Some(Peer::new(1, "a1")),
+            follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
+            leader_state: Some(LeaderState::Downgrading),
+            leader_down_since: None,
+        };
+        let input = r#"{"region":{"id":2,"name":"r2","partition":null,"attrs":{}},"leader_peer":{"id":1,"addr":"a1"},"follower_peers":[{"id":2,"addr":"a2"},{"id":3,"addr":"a3"}],"leader_state":"Downgrading","leader_down_since":null}"#;
+        let decoded: RegionRoute = serde_json::from_str(input).unwrap();
+        assert_eq!(decoded, region_route);
+
+        let region_route = RegionRoute {
+            region: Region {
+                id: 2.into(),
+                name: "r2".to_string(),
+                partition: None,
+                attrs: BTreeMap::new(),
+            },
+            leader_peer: Some(Peer::new(1, "a1")),
+            follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
+            leader_state: Some(LeaderState::Downgrading),
+            leader_down_since: None,
+        };
+        let input = r#"{"region":{"id":2,"name":"r2","partition":null,"attrs":{}},"leader_peer":{"id":1,"addr":"a1"},"follower_peers":[{"id":2,"addr":"a2"},{"id":3,"addr":"a3"}],"leader_status":"Downgrading","leader_down_since":null}"#;
+        let decoded: RegionRoute = serde_json::from_str(input).unwrap();
+        assert_eq!(decoded, region_route);
+    }
+
    #[test]
    fn test_de_serialize_partition() {
        let p = Partition {
--- a/src/common/meta/src/wal_options_allocator.rs
+++ b/src/common/meta/src/wal_options_allocator.rs
@@ -17,6 +17,7 @@ pub mod kafka;
 use std::collections::HashMap;
 use std::sync::Arc;

+use async_trait::async_trait;
 use common_wal::config::MetasrvWalConfig;
 use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
 use snafu::ResultExt;
@@ -24,6 +25,7 @@ use store_api::storage::{RegionId, RegionNumber};

 use crate::error::{EncodeWalOptionsSnafu, Result};
 use crate::kv_backend::KvBackendRef;
+use crate::leadership_notifier::LeadershipChangeListener;
 use crate::wal_options_allocator::kafka::topic_manager::TopicManager as KafkaTopicManager;

 /// Allocates wal options in region granularity.
@@ -94,6 +96,21 @@ impl WalOptionsAllocator {
    }
 }

+#[async_trait]
+impl LeadershipChangeListener for WalOptionsAllocator {
+    fn name(&self) -> &str {
+        "WalOptionsAllocator"
+    }
+
+    async fn on_leader_start(&self) -> Result<()> {
+        self.start().await
+    }
+
+    async fn on_leader_stop(&self) -> Result<()> {
+        Ok(())
+    }
+}
+
 /// Allocates a wal options for each region. The allocated wal options is encoded immediately.
 pub fn allocate_region_wal_options(
    regions: Vec<RegionNumber>,
--- a/src/common/pprof/Cargo.toml
+++ b/src/common/pprof/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "common-pprof"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+common-error.workspace = true
+common-macro.workspace = true
+prost.workspace = true
+snafu.workspace = true
+tokio.workspace = true
+
+[target.'cfg(unix)'.dependencies]
+pprof = { version = "0.13", features = [
+    "flamegraph",
+    "prost-codec",
+    "protobuf",
+] }
+
+[lints]
+workspace = true
--- a/src/common/pprof/src/lib.rs
+++ b/src/common/pprof/src/lib.rs
@@ -0,0 +1,99 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#[cfg(unix)]
+pub mod nix;
+
+pub mod error {
+    use std::any::Any;
+
+    use common_error::ext::ErrorExt;
+    use common_error::status_code::StatusCode;
+    use common_macro::stack_trace_debug;
+    use snafu::{Location, Snafu};
+
+    #[derive(Snafu)]
+    #[stack_trace_debug]
+    #[snafu(visibility(pub(crate)))]
+    pub enum Error {
+        #[cfg(unix)]
+        #[snafu(display("Pprof error"))]
+        Pprof {
+            #[snafu(source)]
+            error: pprof::Error,
+            #[snafu(implicit)]
+            location: Location,
+        },
+
+        #[snafu(display("Pprof is unsupported on this platform"))]
+        Unsupported {
+            #[snafu(implicit)]
+            location: Location,
+        },
+    }
+
+    pub type Result<T> = std::result::Result<T, Error>;
+
+    impl ErrorExt for Error {
+        fn status_code(&self) -> StatusCode {
+            match self {
+                #[cfg(unix)]
+                Error::Pprof { .. } => StatusCode::Unexpected,
+                Error::Unsupported { .. } => StatusCode::Unsupported,
+            }
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+    }
+}
+
+#[cfg(not(unix))]
+pub mod dummy {
+    use std::time::Duration;
+
+    use crate::error::{Result, UnsupportedSnafu};
+
+    /// Dummpy CPU profiler utility.
+    #[derive(Debug)]
+    pub struct Profiling {}
+
+    impl Profiling {
+        /// Creates a new profiler.
+        pub fn new(_duration: Duration, _frequency: i32) -> Profiling {
+            Profiling {}
+        }
+
+        /// Profiles and returns a generated text.
+        pub async fn dump_text(&self) -> Result<String> {
+            UnsupportedSnafu {}.fail()
+        }
+
+        /// Profiles and returns a generated flamegraph.
+        pub async fn dump_flamegraph(&self) -> Result<Vec<u8>> {
+            UnsupportedSnafu {}.fail()
+        }
+
+        /// Profiles and returns a generated proto.
+        pub async fn dump_proto(&self) -> Result<Vec<u8>> {
+            UnsupportedSnafu {}.fail()
+        }
+    }
+}
+
+#[cfg(not(unix))]
+pub use dummy::Profiling;
+#[cfg(unix)]
+pub use nix::Profiling;
--- a/src/common/pprof/src/nix.rs
+++ b/src/common/pprof/src/nix.rs
@@ -0,0 +1,78 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::time::Duration;
+
+use pprof::protos::Message;
+use snafu::ResultExt;
+
+use crate::error::{PprofSnafu, Result};
+
+/// CPU profiler utility.
+// Inspired by https://github.com/datafuselabs/databend/blob/67f445e83cd4eceda98f6c1c114858929d564029/src/common/base/src/base/profiling.rs
+#[derive(Debug)]
+pub struct Profiling {
+    /// Sample duration.
+    duration: Duration,
+    /// Sample frequency.
+    frequency: i32,
+}
+
+impl Profiling {
+    /// Creates a new profiler.
+    pub fn new(duration: Duration, frequency: i32) -> Profiling {
+        Profiling {
+            duration,
+            frequency,
+        }
+    }
+
+    /// Profiles and returns a generated pprof report.
+    pub async fn report(&self) -> Result<pprof::Report> {
+        let guard = pprof::ProfilerGuardBuilder::default()
+            .frequency(self.frequency)
+            .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+            .build()
+            .context(PprofSnafu)?;
+        tokio::time::sleep(self.duration).await;
+        guard.report().build().context(PprofSnafu)
+    }
+
+    /// Profiles and returns a generated text.
+    pub async fn dump_text(&self) -> Result<String> {
+        let report = self.report().await?;
+        let text = format!("{report:?}");
+        Ok(text)
+    }
+
+    /// Profiles and returns a generated flamegraph.
+    pub async fn dump_flamegraph(&self) -> Result<Vec<u8>> {
+        let mut body: Vec<u8> = Vec::new();
+
+        let report = self.report().await?;
+        report.flamegraph(&mut body).context(PprofSnafu)?;
+
+        Ok(body)
+    }
+
+    /// Profiles and returns a generated proto.
+    pub async fn dump_proto(&self) -> Result<Vec<u8>> {
+        let report = self.report().await?;
+        // Generate google’s pprof format report.
+        let profile = report.pprof().context(PprofSnafu)?;
+        let body = profile.encode_to_vec();
+
+        Ok(body)
+    }
+}
--- a/src/common/procedure/Cargo.toml
+++ b/src/common/procedure/Cargo.toml
@@ -19,6 +19,7 @@ common-error.workspace = true
 common-macro.workspace = true
 common-runtime.workspace = true
 common-telemetry.workspace = true
+common-time.workspace = true
 futures.workspace = true
 humantime-serde.workspace = true
 object-store.workspace = true
--- a/src/common/procedure/src/lib.rs
+++ b/src/common/procedure/src/lib.rs
@@ -26,7 +26,7 @@ pub mod watcher;
 pub use crate::error::{Error, Result};
 pub use crate::procedure::{
    BoxedProcedure, BoxedProcedureLoader, Context, ContextProvider, LockKey, Output, ParseIdError,
-    Procedure, ProcedureId, ProcedureManager, ProcedureManagerRef, ProcedureState, ProcedureWithId,
-    Status, StringKey,
+    Procedure, ProcedureId, ProcedureInfo, ProcedureManager, ProcedureManagerRef, ProcedureState,
+    ProcedureWithId, Status, StringKey,
 };
 pub use crate::watcher::Watcher;
--- a/src/common/procedure/src/local.rs
+++ b/src/common/procedure/src/local.rs
@@ -16,7 +16,7 @@ mod runner;
 mod rwlock;

 use std::collections::{HashMap, VecDeque};
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
 use std::sync::{Arc, Mutex, RwLock};
 use std::time::{Duration, Instant};

@@ -35,7 +35,7 @@ use crate::error::{
    StartRemoveOutdatedMetaTaskSnafu, StopRemoveOutdatedMetaTaskSnafu,
 };
 use crate::local::runner::Runner;
-use crate::procedure::{BoxedProcedureLoader, InitProcedureState};
+use crate::procedure::{BoxedProcedureLoader, InitProcedureState, ProcedureInfo};
 use crate::store::{ProcedureMessage, ProcedureMessages, ProcedureStore, StateStoreRef};
 use crate::{
    BoxedProcedure, ContextProvider, LockKey, ProcedureId, ProcedureManager, ProcedureState,
@@ -57,6 +57,8 @@ const META_TTL: Duration = Duration::from_secs(60 * 10);
 pub(crate) struct ProcedureMeta {
    /// Id of this procedure.
    id: ProcedureId,
+    /// Type name of this procedure.
+    type_name: String,
    /// Parent procedure id.
    parent_id: Option<ProcedureId>,
    /// Notify to wait for subprocedures.
@@ -69,6 +71,10 @@ pub(crate) struct ProcedureMeta {
    state_receiver: Receiver<ProcedureState>,
    /// Id of child procedures.
    children: Mutex<Vec<ProcedureId>>,
+    /// Start execution time of this procedure.
+    start_time_ms: AtomicI64,
+    /// End execution time of this procedure.
+    end_time_ms: AtomicI64,
 }

 impl ProcedureMeta {
@@ -77,6 +83,7 @@ impl ProcedureMeta {
        procedure_state: ProcedureState,
        parent_id: Option<ProcedureId>,
        lock_key: LockKey,
+        type_name: &str,
    ) -> ProcedureMeta {
        let (state_sender, state_receiver) = watch::channel(procedure_state);
        ProcedureMeta {
@@ -87,6 +94,9 @@ impl ProcedureMeta {
            state_sender,
            state_receiver,
            children: Mutex::new(Vec::new()),
+            start_time_ms: AtomicI64::new(0),
+            end_time_ms: AtomicI64::new(0),
+            type_name: type_name.to_string(),
        }
    }

@@ -117,6 +127,18 @@ impl ProcedureMeta {
    fn num_children(&self) -> usize {
        self.children.lock().unwrap().len()
    }
+
+    /// update the start time of the procedure.
+    fn set_start_time_ms(&self) {
+        self.start_time_ms
+            .store(common_time::util::current_time_millis(), Ordering::Relaxed);
+    }
+
+    /// update the end time of the procedure.
+    fn set_end_time_ms(&self) {
+        self.end_time_ms
+            .store(common_time::util::current_time_millis(), Ordering::Relaxed);
+    }
 }

 /// Reference counted pointer to [ProcedureMeta].
@@ -210,6 +232,22 @@ impl ManagerContext {
        procedures.get(&procedure_id).map(|meta| meta.state())
    }

+    /// Returns the [ProcedureMeta] of all procedures.
+    fn list_procedure(&self) -> Vec<ProcedureInfo> {
+        let procedures = self.procedures.read().unwrap();
+        procedures
+            .values()
+            .map(|meta| ProcedureInfo {
+                id: meta.id,
+                type_name: meta.type_name.clone(),
+                start_time_ms: meta.start_time_ms.load(Ordering::Relaxed),
+                end_time_ms: meta.end_time_ms.load(Ordering::Relaxed),
+                state: meta.state(),
+                lock_keys: meta.lock_key.get_keys(),
+            })
+            .collect()
+    }
+
    /// Returns the [Watcher] of specific `procedure_id`.
    fn watcher(&self, procedure_id: ProcedureId) -> Option<Watcher> {
        let procedures = self.procedures.read().unwrap();
@@ -438,6 +476,7 @@ impl LocalManager {
            procedure_state,
            None,
            procedure.lock_key(),
+            procedure.type_name(),
        ));
        let runner = Runner {
            meta: meta.clone(),
@@ -641,6 +680,10 @@ impl ProcedureManager for LocalManager {
    fn procedure_watcher(&self, procedure_id: ProcedureId) -> Option<Watcher> {
        self.manager_ctx.watcher(procedure_id)
    }
+
+    async fn list_procedures(&self) -> Result<Vec<ProcedureInfo>> {
+        Ok(self.manager_ctx.list_procedure())
+    }
 }

 struct RemoveOutdatedMetaFunction {
@@ -675,6 +718,7 @@ pub(crate) mod test_util {
            ProcedureState::Running,
            None,
            LockKey::default(),
+            "ProcedureAdapter",
        )
    }

--- a/src/common/procedure/src/local/runner.rs
+++ b/src/common/procedure/src/local/runner.rs
@@ -27,7 +27,9 @@ use crate::error::{self, ProcedurePanicSnafu, Result, RollbackTimesExceededSnafu
 use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
 use crate::procedure::{Output, StringKey};
 use crate::store::{ProcedureMessage, ProcedureStore};
-use crate::{BoxedProcedure, Context, Error, ProcedureId, ProcedureState, ProcedureWithId, Status};
+use crate::{
+    BoxedProcedure, Context, Error, Procedure, ProcedureId, ProcedureState, ProcedureWithId, Status,
+};

 /// A guard to cleanup procedure state.
 struct ProcedureGuard {
@@ -129,7 +131,9 @@ impl Runner {

        // Execute the procedure. We need to release the lock whenever the execution
        // is successful or fail.
+        self.meta.set_start_time_ms();
        self.execute_procedure_in_loop().await;
+        self.meta.set_end_time_ms();

        // We can't remove the metadata of the procedure now as users and its parent might
        // need to query its state.
@@ -368,6 +372,7 @@ impl Runner {
            procedure_state,
            Some(self.meta.id),
            procedure.lock_key(),
+            procedure.type_name(),
        ));
        let runner = Runner {
            meta: meta.clone(),
--- a/src/common/procedure/src/procedure.rs
+++ b/src/common/procedure/src/procedure.rs
@@ -159,6 +159,14 @@ impl<T: Procedure + ?Sized> Procedure for Box<T> {
        (**self).execute(ctx).await
    }

+    async fn rollback(&mut self, ctx: &Context) -> Result<()> {
+        (**self).rollback(ctx).await
+    }
+
+    fn rollback_supported(&self) -> bool {
+        (**self).rollback_supported()
+    }
+
    fn dump(&self) -> Result<String> {
        (**self).dump()
    }
@@ -227,6 +235,11 @@ impl LockKey {
    pub fn keys_to_lock(&self) -> impl Iterator<Item = &StringKey> {
        self.0.iter()
    }
+
+    /// Returns the keys to lock.
+    pub fn get_keys(&self) -> Vec<String> {
+        self.0.iter().map(|key| format!("{:?}", key)).collect()
+    }
 }

 /// Boxed [Procedure].
@@ -374,6 +387,18 @@ impl ProcedureState {
            _ => None,
        }
    }
+
+    /// Return the string values of the enum field names.
+    pub fn as_str_name(&self) -> &str {
+        match self {
+            ProcedureState::Running => "Running",
+            ProcedureState::Done { .. } => "Done",
+            ProcedureState::Retrying { .. } => "Retrying",
+            ProcedureState::Failed { .. } => "Failed",
+            ProcedureState::PrepareRollback { .. } => "PrepareRollback",
+            ProcedureState::RollingBack { .. } => "RollingBack",
+        }
+    }
 }

 /// The initial procedure state.
@@ -412,11 +437,30 @@ pub trait ProcedureManager: Send + Sync + 'static {

    /// Returns a [Watcher] to watch [ProcedureState] of specific procedure.
    fn procedure_watcher(&self, procedure_id: ProcedureId) -> Option<Watcher>;
+
+    /// Returns the details of the procedure.
+    async fn list_procedures(&self) -> Result<Vec<ProcedureInfo>>;
 }

 /// Ref-counted pointer to the [ProcedureManager].
 pub type ProcedureManagerRef = Arc<dyn ProcedureManager>;

+#[derive(Debug, Clone)]
+pub struct ProcedureInfo {
+    /// Id of this procedure.
+    pub id: ProcedureId,
+    /// Type name of this procedure.
+    pub type_name: String,
+    /// Start execution time of this procedure.
+    pub start_time_ms: i64,
+    /// End execution time of this procedure.
+    pub end_time_ms: i64,
+    /// status of this procedure.
+    pub state: ProcedureState,
+    /// Lock keys of this procedure.
+    pub lock_keys: Vec<String>,
+}
+
 #[cfg(test)]
 mod tests {
    use common_error::mock::MockError;
--- a/src/common/recordbatch/src/adapter.rs
+++ b/src/common/recordbatch/src/adapter.rs
@@ -329,6 +329,7 @@ impl ExecutionPlanVisitor for MetricCollector {
                level: self.current_level,
                metrics: vec![],
            });
+            self.current_level += 1;
            return Ok(true);
        };

@@ -365,8 +366,7 @@ impl ExecutionPlanVisitor for MetricCollector {
    }

    fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> std::result::Result<bool, Self::Error> {
-        // the last minus will underflow
-        self.current_level = self.current_level.wrapping_sub(1);
+        self.current_level -= 1;
        Ok(true)
    }
 }
--- a/src/common/telemetry/Cargo.toml
+++ b/src/common/telemetry/Cargo.toml
@@ -17,6 +17,7 @@ backtrace = "0.3"
 common-error.workspace = true
 console-subscriber = { version = "0.1", optional = true }
 greptime-proto.workspace = true
+humantime-serde.workspace = true
 lazy_static.workspace = true
 once_cell.workspace = true
 opentelemetry = { version = "0.21.0", default-features = false, features = [
--- a/src/common/telemetry/src/logging.rs
+++ b/src/common/telemetry/src/logging.rs
@@ -15,6 +15,7 @@
 //! logging stuffs, inspired by databend
 use std::env;
 use std::sync::{Arc, Mutex, Once};
+use std::time::Duration;

 use once_cell::sync::{Lazy, OnceCell};
 use opentelemetry::{global, KeyValue};
@@ -26,7 +27,7 @@ use serde::{Deserialize, Serialize};
 use tracing_appender::non_blocking::WorkerGuard;
 use tracing_appender::rolling::{RollingFileAppender, Rotation};
 use tracing_log::LogTracer;
-use tracing_subscriber::filter::Targets;
+use tracing_subscriber::filter::{FilterFn, Targets};
 use tracing_subscriber::fmt::Layer;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::prelude::*;
@@ -53,6 +54,9 @@ pub struct LoggingOptions {
    /// The log format that can be one of "json" or "text". Default is "text".
    pub log_format: LogFormat,

+    /// The maximum number of log files set by default.
+    pub max_log_files: usize,
+
    /// Whether to append logs to stdout. Default is true.
    pub append_stdout: bool,

@@ -64,6 +68,24 @@ pub struct LoggingOptions {

    /// The tracing sample ratio.
    pub tracing_sample_ratio: Option<TracingSampleOptions>,
+
+    /// The logging options of slow query.
+    pub slow_query: SlowQueryOptions,
+}
+
+/// The options of slow query.
+#[derive(Clone, Debug, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct SlowQueryOptions {
+    /// Whether to enable slow query log.
+    pub enable: bool,
+
+    /// The threshold of slow queries.
+    #[serde(with = "humantime_serde")]
+    pub threshold: Option<Duration>,
+
+    /// The sample ratio of slow queries.
+    pub sample_ratio: Option<f64>,
 }

 #[derive(Clone, Debug, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -96,6 +118,9 @@ impl Default for LoggingOptions {
            otlp_endpoint: None,
            tracing_sample_ratio: None,
            append_stdout: true,
+            slow_query: SlowQueryOptions::default(),
+            // Rotation hourly, 24 files per day, keeps info log files of 30 days
+            max_log_files: 720,
        }
    }
 }
@@ -186,8 +211,17 @@ pub fn init_global_logging(

        // Configure the file logging layer with rolling policy.
        let file_logging_layer = if !opts.dir.is_empty() {
-            let rolling_appender =
-                RollingFileAppender::new(Rotation::HOURLY, &opts.dir, "greptimedb");
+            let rolling_appender = RollingFileAppender::builder()
+                .rotation(Rotation::HOURLY)
+                .filename_prefix("greptimedb")
+                .max_log_files(opts.max_log_files)
+                .build(&opts.dir)
+                .unwrap_or_else(|e| {
+                    panic!(
+                        "initializing rolling file appender at {} failed: {}",
+                        &opts.dir, e
+                    )
+                });
            let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
            guards.push(guard);

@@ -208,8 +242,17 @@ pub fn init_global_logging(

        // Configure the error file logging layer with rolling policy.
        let err_file_logging_layer = if !opts.dir.is_empty() {
-            let rolling_appender =
-                RollingFileAppender::new(Rotation::HOURLY, &opts.dir, "greptimedb-err");
+            let rolling_appender = RollingFileAppender::builder()
+                .rotation(Rotation::HOURLY)
+                .filename_prefix("greptimedb-err")
+                .max_log_files(opts.max_log_files)
+                .build(&opts.dir)
+                .unwrap_or_else(|e| {
+                    panic!(
+                        "initializing rolling file appender at {} failed: {}",
+                        &opts.dir, e
+                    )
+                });
            let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
            guards.push(guard);

@@ -235,6 +278,51 @@ pub fn init_global_logging(
            None
        };

+        let slow_query_logging_layer = if !opts.dir.is_empty() && opts.slow_query.enable {
+            let rolling_appender = RollingFileAppender::builder()
+                .rotation(Rotation::HOURLY)
+                .filename_prefix("greptimedb-slow-queries")
+                .max_log_files(opts.max_log_files)
+                .build(&opts.dir)
+                .unwrap_or_else(|e| {
+                    panic!(
+                        "initializing rolling file appender at {} failed: {}",
+                        &opts.dir, e
+                    )
+                });
+            let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
+            guards.push(guard);
+
+            // Only logs if the field contains "slow".
+            let slow_query_filter = FilterFn::new(|metadata| {
+                metadata
+                    .fields()
+                    .iter()
+                    .any(|field| field.name().contains("slow"))
+            });
+
+            if opts.log_format == LogFormat::Json {
+                Some(
+                    Layer::new()
+                        .json()
+                        .with_writer(writer)
+                        .with_ansi(false)
+                        .with_filter(slow_query_filter)
+                        .boxed(),
+                )
+            } else {
+                Some(
+                    Layer::new()
+                        .with_writer(writer)
+                        .with_ansi(false)
+                        .with_filter(slow_query_filter)
+                        .boxed(),
+                )
+            }
+        } else {
+            None
+        };
+
        // resolve log level settings from:
        // - options from command line or config files
        // - environment variable: RUST_LOG
@@ -279,6 +367,7 @@ pub fn init_global_logging(
                .with(stdout_logging_layer)
                .with(file_logging_layer)
                .with(err_file_logging_layer)
+                .with(slow_query_logging_layer)
        };

        // consume the `tracing_opts` to avoid "unused" warnings.
@@ -289,7 +378,8 @@ pub fn init_global_logging(
            .with(dyn_filter)
            .with(stdout_logging_layer)
            .with(file_logging_layer)
-            .with(err_file_logging_layer);
+            .with(err_file_logging_layer)
+            .with(slow_query_logging_layer);

        if opts.enable_otlp_tracing {
            global::set_text_map_propagator(TraceContextPropagator::new());
--- a/Show More
+++ b/Show More