chore: rebase to main

chore: per review
docs: more comment on accums internal
2025-12-25 07:30:02 +00:00 · 2024-04-25 14:20:44 +08:00 · 2024-04-25 14:06:44 +08:00 · 2024-04-25 14:06:44 +08:00 · 2024-04-25 14:06:44 +08:00 · 2024-04-25 14:06:44 +08:00
531 changed files with 23096 additions and 8321 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1,27 @@
+# GreptimeDB CODEOWNERS
+
+# These owners will be the default owners for everything in the repo.
+
+* @GreptimeTeam/db-approver
+
+## [Module] Databse Engine
+/src/index @zhongzc
+/src/mito2 @evenyag @v0y4g3r @waynexia
+/src/query @evenyag
+
+## [Module] Distributed
+/src/common/meta @MichaelScofield
+/src/common/procedure @MichaelScofield
+/src/meta-client @MichaelScofield
+/src/meta-srv @MichaelScofield
+
+## [Module] Write Ahead Log
+/src/log-store @v0y4g3r
+/src/store-api @v0y4g3r
+
+## [Module] Metrics Engine
+/src/metric-engine @waynexia
+/src/promql @waynexia
+
+## [Module] Flow
+/src/flow @zhongzc @waynexia
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -39,7 +39,7 @@ body:
        - Query Engine
        - Table Engine
        - Write Protocols
-        - MetaSrv
+        - Metasrv
        - Frontend
        - Datanode
        - Other
--- a/.github/actions/build-dev-builder-images/action.yml
+++ b/.github/actions/build-dev-builder-images/action.yml
@@ -22,15 +22,15 @@ inputs:
  build-dev-builder-ubuntu:
    description: Build dev-builder-ubuntu image
    required: false
-    default: 'true'
+    default: "true"
  build-dev-builder-centos:
    description: Build dev-builder-centos image
    required: false
-    default: 'true'
+    default: "true"
  build-dev-builder-android:
    description: Build dev-builder-android image
    required: false
-    default: 'true'
+    default: "true"
 runs:
  using: composite
  steps:
@@ -47,7 +47,7 @@ runs:
      run: |
        make dev-builder \
          BASE_IMAGE=ubuntu \
-          BUILDX_MULTI_PLATFORM_BUILD=true \
+          BUILDX_MULTI_PLATFORM_BUILD=all \
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
          IMAGE_TAG=${{ inputs.version }}
@@ -58,7 +58,7 @@ runs:
      run: |
        make dev-builder \
          BASE_IMAGE=centos \
-          BUILDX_MULTI_PLATFORM_BUILD=true \
+          BUILDX_MULTI_PLATFORM_BUILD=amd64 \
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
          IMAGE_TAG=${{ inputs.version }}
@@ -72,5 +72,5 @@ runs:
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
          IMAGE_TAG=${{ inputs.version }} && \
-        
+
        docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}
--- a/.github/actions/build-linux-artifacts/action.yml
+++ b/.github/actions/build-linux-artifacts/action.yml
@@ -16,7 +16,7 @@ inputs:
  dev-mode:
    description: Enable dev mode, only build standard greptime
    required: false
-    default: 'false'
+    default: "false"
  working-dir:
    description: Working directory to build the artifacts
    required: false
@@ -68,7 +68,7 @@ runs:

    - name: Build greptime on centos base image
      uses: ./.github/actions/build-greptime-binary
-      if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Only build centos7 base image for amd64.
+      if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
      with:
        base-image: centos
        features: servers/dashboard
@@ -79,7 +79,7 @@ runs:

    - name: Build greptime on android base image
      uses: ./.github/actions/build-greptime-binary
-      if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Only build android base image on amd64.
+      if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds arm64 greptime binary for android if the host machine amd64.
      with:
        base-image: android
        artifacts-dir: greptime-android-arm64-${{ inputs.version }}
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -13,7 +13,7 @@ on:
 name: Build API docs

 env:
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18

 jobs:
  apidoc:
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -30,15 +30,20 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18

 jobs:
-  typos:
-    name: Spell Check with Typos
+  check-typos-and-docs:
+    name: Check typos and docs
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
      - uses: crate-ci/typos@v1.13.10
+      - name: Check the config docs
+        run: |
+          make config-docs && \
+          git diff --name-only --exit-code ./config/config.md  \
+          || (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)

  check:
    name: Check
@@ -93,6 +98,8 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - uses: arduino/setup-protoc@v3
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: ${{ env.RUST_TOOLCHAIN }}
@@ -123,10 +130,12 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        target: [ "fuzz_create_table", "fuzz_alter_table" ]
+        target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ]
    steps:
      - uses: actions/checkout@v4
      - uses: arduino/setup-protoc@v3
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: ${{ env.RUST_TOOLCHAIN }}
@@ -138,8 +147,9 @@ jobs:
      - name: Set Rust Fuzz
        shell: bash
        run: |
-          sudo apt update && sudo apt install -y libfuzzer-14-dev
-          cargo install cargo-fuzz
+          sudo apt-get install -y libfuzzer-14-dev
+          rustup install nightly
+          cargo +nightly install cargo-fuzz
      - name: Download pre-built binaries
        uses: actions/download-artifact@v4
        with:
@@ -175,13 +185,13 @@ jobs:
      - name: Unzip binaries
        run: tar -xvf ./bins.tar.gz
      - name: Run sqlness
-        run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins
+        run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins --preserve-state
      - name: Upload sqlness logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: sqlness-logs
-          path: /tmp/greptime-*.log
+          path: /tmp/sqlness-*
          retention-days: 3

  sqlness-kafka-wal:
@@ -205,13 +215,13 @@ jobs:
        working-directory: tests-integration/fixtures/kafka
        run: docker compose -f docker-compose-standalone.yml up -d --wait
      - name: Run sqlness
-        run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins
+        run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins --preserve-state
      - name: Upload sqlness logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: sqlness-logs-with-kafka-wal
-          path: /tmp/greptime-*.log
+          path: /tmp/sqlness-*
          retention-days: 3

  fmt:
@@ -305,10 +315,10 @@ jobs:
          CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
          RUST_BACKTRACE: 1
          CARGO_INCREMENTAL: 0
-          GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
-          GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
-          GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
-          GT_S3_REGION: ${{ secrets.S3_REGION }}
+          GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
+          GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
+          GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
+          GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          UNITTEST_LOG_DIR: "__unittest_logs"
@@ -321,20 +331,20 @@ jobs:
          fail_ci_if_error: false
          verbose: true

-  compat:
-    name: Compatibility Test
-    needs: build
-    runs-on: ubuntu-20.04
-    timeout-minutes: 60
-    steps:
-      - uses: actions/checkout@v4
-      - name: Download pre-built binaries
-        uses: actions/download-artifact@v4
-        with:
-          name: bins
-          path: .
-      - name: Unzip binaries
-        run: |
-          mkdir -p ./bins/current
-          tar -xvf ./bins.tar.gz --strip-components=1 -C ./bins/current
-      - run: ./tests/compat/test-compat.sh 0.6.0
+  # compat:
+  #   name: Compatibility Test
+  #   needs: build
+  #   runs-on: ubuntu-20.04
+  #   timeout-minutes: 60
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Download pre-built binaries
+  #       uses: actions/download-artifact@v4
+  #       with:
+  #         name: bins
+  #         path: .
+  #     - name: Unzip binaries
+  #       run: |
+  #         mkdir -p ./bins/current
+  #         tar -xvf ./bins.tar.gz --strip-components=1 -C ./bins/current
+  #     - run: ./tests/compat/test-compat.sh 0.6.0
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18

 jobs:
  sqlness:
@@ -85,10 +85,10 @@ jobs:
        env:
          RUST_BACKTRACE: 1
          CARGO_INCREMENTAL: 0
-          GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
-          GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
-          GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
-          GT_S3_REGION: ${{ secrets.S3_REGION }}
+          GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
+          GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
+          GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
+          GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
          UNITTEST_LOG_DIR: "__unittest_logs"
      - name: Notify slack if failed
        if: failure()
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -82,7 +82,7 @@ on:
 # Use env variables to control all the release process.
 env:
  # The arguments of building greptime.
-  RUST_TOOLCHAIN: nightly-2023-12-19
+  RUST_TOOLCHAIN: nightly-2024-04-18
  CARGO_PROFILE: nightly

  # Controls whether to run tests, include unit-test, integration-test and sqlness.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -50,7 +50,7 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim

 - To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
 - Make sure all files have proper license header (running `docker run --rm -v $(pwd):/github/workspace ghcr.io/korandoru/hawkeye-native:v3 format` from the project root).
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
+- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/) and [style guide](http://github.com/greptimeTeam/docs/style-guide.md).
 - Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
 - Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -70,16 +70,24 @@ license = "Apache-2.0"
 clippy.print_stdout = "warn"
 clippy.print_stderr = "warn"
 clippy.implicit_clone = "warn"
+clippy.readonly_write_lock = "allow"
 rust.unknown_lints = "deny"
+# Remove this after https://github.com/PyO3/pyo3/issues/4094
+rust.non_local_definitions = "allow"

 [workspace.dependencies]
+# We turn off default-features for some dependencies here so the workspaces which inherit them can
+# selectively turn them on if needed, since we can override default-features = true (from false)
+# for the inherited dependency but cannot do the reverse (override from true to false).
+#
+# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.3"
-arrow = { version = "47.0" }
-arrow-array = "47.0"
-arrow-flight = "47.0"
-arrow-ipc = { version = "47.0", features = ["lz4"] }
-arrow-schema = { version = "47.0", features = ["serde"] }
+arrow = { version = "51.0.0", features = ["prettyprint"] }
+arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] }
+arrow-flight = "51.0"
+arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4"] }
+arrow-schema = { version = "51.0", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 axum = { version = "0.6", features = ["headers"] }
@@ -90,21 +98,24 @@ bytemuck = "1.12"
 bytes = { version = "1.5", features = ["serde"] }
 chrono = { version = "0.4", features = ["serde"] }
 clap = { version = "4.4", features = ["derive"] }
+crossbeam-utils = "0.8"
 dashmap = "5.4"
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
-datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
+datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
 derive_builder = "0.12"
 dotenv = "0.15"
-etcd-client = "0.12"
+# TODO(LFC): Wait for https://github.com/etcdv3/etcd-client/pull/76
+etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev = "4c371e9b3ea8e0a8ee2f9cbd7ded26e54a45df3b" }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1bd2398b686e5ac6c1eef6daf615867ce27f75c1" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "73ac0207ab71dfea48f30259ffdb611501b5ecb8" }
 humantime = "2.1"
 humantime-serde = "1.1"
 itertools = "0.10"
@@ -115,12 +126,12 @@ moka = "0.12"
 notify = "6.1"
 num_cpus = "1.16"
 once_cell = "1.18"
-opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
+opentelemetry-proto = { version = "0.5", features = [
    "gen-tonic",
    "metrics",
    "trace",
 ] }
-parquet = "47.0"
+parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
@@ -133,6 +144,7 @@ reqwest = { version = "0.11", default-features = false, features = [
    "json",
    "rustls-tls-native-roots",
    "stream",
+    "multipart",
 ] }
 rskafka = "0.5"
 rust_decimal = "1.33"
@@ -143,18 +155,18 @@ serde_with = "3"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.7"
 sysinfo = "0.30"
-# on branch v0.38.x
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
+# on branch v0.44.x
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "c919990bf62ad38d2b0c0a3bc90b26ad919d51b0", features = [
    "visitor",
 ] }
 strum = { version = "0.25", features = ["derive"] }
 tempfile = "3"
-tokio = { version = "1.28", features = ["full"] }
+tokio = { version = "1.36", features = ["full"] }
 tokio-stream = { version = "0.1" }
 tokio-util = { version = "0.7", features = ["io-util", "compat"] }
 toml = "0.8.8"
-tonic = { version = "0.10", features = ["tls"] }
-uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
+tonic = { version = "0.11", features = ["tls"] }
+uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
 zstd = "0.13"

 ## workspaces members
--- a/18
+++ b/18
@@ -54,8 +54,10 @@ ifneq ($(strip $(RELEASE)),)
 	CARGO_BUILD_OPTS += --release
 endif

-ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), true)
+ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all)
 	BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
+else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
+	BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
 else
 	BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
 endif
@@ -169,6 +171,10 @@ check: ## Cargo check all the targets.
 clippy: ## Check clippy rules.
 	cargo clippy --workspace --all-targets --all-features -- -D warnings

+.PHONY: fix-clippy
+fix-clippy: ## Fix clippy violations.
+	cargo clippy --workspace --all-targets --all-features --fix
+
 .PHONY: fmt-check
 fmt-check: ## Check code format.
 	cargo fmt --all -- --check
@@ -188,6 +194,16 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder.
 	-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
 	make test sqlness-test BUILD_JOBS=${BUILD_JOBS}

+##@ Docs
+config-docs: ## Generate configuration documentation from toml files.
+	docker run --rm \
+    -v ${PWD}:/greptimedb \
+    -w /greptimedb/config \
+    toml2docs/toml2docs:latest \
+    -p '##' \
+    -t ./config-docs-template.md \
+    -o ./config.md
+
 ##@ General

 # The help target prints out all targets with their descriptions organized
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ cargo run -- standalone start
 - [GreptimeDB C++ Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-cpp)
 - [GreptimeDB Erlang Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-erl)
 - [GreptimeDB Rust Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptime-ingester-js)
+- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-js)

 ### Grafana Dashboard

--- a/benchmarks/src/bin/nyc-taxi.rs
+++ b/benchmarks/src/bin/nyc-taxi.rs
@@ -215,37 +215,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
                ColumnDataType::String,
            )
        }
-        DataType::Null
-        | DataType::Boolean
-        | DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64
-        | DataType::Float16
-        | DataType::Float32
-        | DataType::Date32
-        | DataType::Date64
-        | DataType::Time32(_)
-        | DataType::Time64(_)
-        | DataType::Duration(_)
-        | DataType::Interval(_)
-        | DataType::Binary
-        | DataType::FixedSizeBinary(_)
-        | DataType::LargeBinary
-        | DataType::LargeUtf8
-        | DataType::List(_)
-        | DataType::FixedSizeList(_, _)
-        | DataType::LargeList(_)
-        | DataType::Struct(_)
-        | DataType::Union(_, _)
-        | DataType::Dictionary(_, _)
-        | DataType::Decimal128(_, _)
-        | DataType::Decimal256(_, _)
-        | DataType::RunEndEncoded(_, _)
-        | DataType::Map(_, _) => todo!(),
+        _ => unimplemented!(),
    }
 }

@@ -444,7 +414,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
 fn query_set(table_name: &str) -> HashMap<String, String> {
    HashMap::from([
        (
-            "count_all".to_string(), 
+            "count_all".to_string(),
            format!("SELECT COUNT(*) FROM {table_name};"),
        ),
        (
--- a/cliff.toml
+++ b/cliff.toml
@@ -53,7 +53,7 @@ Release date: {{ timestamp | date(format="%B %d, %Y") }}
  ## New Contributors
 {% endif -%}
 {% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %}
-  * @{{ contributor.username }} made their first contribution
+  * [@{{ contributor.username }}](https://github.com/{{ contributor.username }}) made their first contribution
    {%- if contributor.pr_number %} in \
      [#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \
    {%- endif %}
@@ -65,7 +65,17 @@ Release date: {{ timestamp | date(format="%B %d, %Y") }}

 We would like to thank the following contributors from the GreptimeDB community:

-{{ github.contributors | map(attribute="username") | join(sep=", ") }}
+{%- set contributors = github.contributors | sort(attribute="username") | map(attribute="username") -%}
+{%- set bots = ['dependabot[bot]'] %}
+
+{% for contributor in contributors %}
+{%- if bots is containing(contributor) -%}{% continue %}{%- endif -%}
+{%- if loop.first -%}
+  [@{{ contributor }}](https://github.com/{{ contributor }})
+{%- else -%}
+  , [@{{ contributor }}](https://github.com/{{ contributor }})
+{%- endif -%}
+{%- endfor %}
 {%- endif %}
 {% raw %}\n{% endraw %}

--- a/config/config-docs-template.md
+++ b/config/config-docs-template.md
@@ -0,0 +1,19 @@
+# Configurations
+
+## Standalone Mode
+
+{{ toml2docs "./standalone.example.toml" }}
+
+## Cluster Mode
+
+### Frontend
+
+{{ toml2docs "./frontend.example.toml" }}
+
+### Metasrv
+
+{{ toml2docs "./metasrv.example.toml" }}
+
+### Datanode
+
+{{ toml2docs "./datanode.example.toml" }}
--- a/config/config.md
+++ b/config/config.md
@@ -0,0 +1,376 @@
+# Configurations
+
+## Standalone Mode
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
+| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
+| `default_timezone` | String | `None` | The default timezone of the server. |
+| `http` | -- | -- | The HTTP server options. |
+| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
+| `http.timeout` | String | `30s` | HTTP request timeout. |
+| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
+| `grpc` | -- | -- | The gRPC server options. |
+| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
+| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `mysql` | -- | -- | MySQL server options. |
+| `mysql.enable` | Bool | `true` | Whether to enable. |
+| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
+| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.tls` | -- | -- | -- |
+| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
+| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
+| `mysql.tls.key_path` | String | `None` | Private key file path. |
+| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `postgres` | -- | -- | PostgresSQL server options. |
+| `postgres.enable` | Bool | `true` | Whether to enable |
+| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
+| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
+| `postgres.tls.mode` | String | `disable` | TLS mode. |
+| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
+| `postgres.tls.key_path` | String | `None` | Private key file path. |
+| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `opentsdb` | -- | -- | OpenTSDB protocol options. |
+| `opentsdb.enable` | Bool | `true` | Whether to enable |
+| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
+| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `influxdb` | -- | -- | InfluxDB protocol options. |
+| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `prom_store` | -- | -- | Prometheus remote storage options |
+| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
+| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
+| `wal` | -- | -- | The WAL options. |
+| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
+| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
+| `metadata_store` | -- | -- | Metadata storage options. |
+| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
+| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
+| `procedure` | -- | -- | Procedure storage options. |
+| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
+| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
+| `storage` | -- | -- | The data storage options. |
+| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
+| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
+| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
+| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
+| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
+| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
+| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
+| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
+| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
+| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
+| `region_engine.mito` | -- | -- | The Mito engine options. |
+| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
+| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
+| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
+| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
+| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
+| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
+| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
+| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
+| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
+| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
+| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
+| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
+| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
+| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
+| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
+| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
+| `region_engine.mito.memtable` | -- | -- | -- |
+| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
+| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
+
+
+## Cluster Mode
+
+### Frontend
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
+| `default_timezone` | String | `None` | The default timezone of the server. |
+| `heartbeat` | -- | -- | The heartbeat options. |
+| `heartbeat.interval` | String | `18s` | Interval for sending heartbeat messages to the metasrv. |
+| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
+| `http` | -- | -- | The HTTP server options. |
+| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
+| `http.timeout` | String | `30s` | HTTP request timeout. |
+| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`. |
+| `grpc` | -- | -- | The gRPC server options. |
+| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
+| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `mysql` | -- | -- | MySQL server options. |
+| `mysql.enable` | Bool | `true` | Whether to enable. |
+| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
+| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.tls` | -- | -- | -- |
+| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
+| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
+| `mysql.tls.key_path` | String | `None` | Private key file path. |
+| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `postgres` | -- | -- | PostgresSQL server options. |
+| `postgres.enable` | Bool | `true` | Whether to enable |
+| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
+| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql_options.tls` section. |
+| `postgres.tls.mode` | String | `disable` | TLS mode. |
+| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
+| `postgres.tls.key_path` | String | `None` | Private key file path. |
+| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
+| `opentsdb` | -- | -- | OpenTSDB protocol options. |
+| `opentsdb.enable` | Bool | `true` | Whether to enable |
+| `opentsdb.addr` | String | `127.0.0.1:4242` | OpenTSDB telnet API server address. |
+| `opentsdb.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `influxdb` | -- | -- | InfluxDB protocol options. |
+| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `prom_store` | -- | -- | Prometheus remote storage options |
+| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
+| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
+| `meta_client` | -- | -- | The metasrv client options. |
+| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
+| `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
+| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
+| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
+| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
+| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
+| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
+| `meta_client.metadata_cache_tti` | String | `5m` | -- |
+| `datanode` | -- | -- | Datanode options. |
+| `datanode.client` | -- | -- | Datanode client options. |
+| `datanode.client.timeout` | String | `10s` | -- |
+| `datanode.client.connect_timeout` | String | `10s` | -- |
+| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
+
+
+### Metasrv
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
+| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
+| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost. |
+| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
+| `selector` | String | `lease_based` | Datanode selector type.<br/>- `lease_based` (default value).<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
+| `use_memory_store` | Bool | `false` | Store data in memory. |
+| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
+| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
+| `procedure` | -- | -- | Procedure storage options. |
+| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. |
+| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
+| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value<br/>GreptimeDB procedure uses etcd as the default metadata storage backend.<br/>The etcd the maximum size of any request is 1.5 MiB<br/>1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)<br/>Comments out the `max_metadata_value_size`, for don't split large value (no limit). |
+| `failure_detector` | -- | -- | -- |
+| `failure_detector.threshold` | Float | `8.0` | -- |
+| `failure_detector.min_std_deviation` | String | `100ms` | -- |
+| `failure_detector.acceptable_heartbeat_pause` | String | `3000ms` | -- |
+| `failure_detector.first_heartbeat_estimate` | String | `1000ms` | -- |
+| `datanode` | -- | -- | Datanode options. |
+| `datanode.client` | -- | -- | Datanode client options. |
+| `datanode.client.timeout` | String | `10s` | -- |
+| `datanode.client.connect_timeout` | String | `10s` | -- |
+| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
+| `wal` | -- | -- | -- |
+| `wal.provider` | String | `raft_engine` | -- |
+| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
+| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. |
+| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
+| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. |
+| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
+| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
+| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
+| `wal.backoff_max` | String | `10s` | The maximum backoff for kafka clients. |
+| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. |
+| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
+
+
+### Datanode
+
+| Key | Type | Default | Descriptions |
+| --- | -----| ------- | ----------- |
+| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
+| `node_id` | Integer | `None` | The datanode identifier and should be unique in the cluster. |
+| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
+| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
+| `rpc_addr` | String | `127.0.0.1:3001` | The gRPC address of the datanode. |
+| `rpc_hostname` | String | `None` | The hostname of the datanode. |
+| `rpc_runtime_size` | Integer | `8` | The number of gRPC server worker threads. |
+| `rpc_max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
+| `rpc_max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
+| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
+| `heartbeat` | -- | -- | The heartbeat options. |
+| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
+| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
+| `meta_client` | -- | -- | The metasrv client options. |
+| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
+| `meta_client.timeout` | String | `3s` | Operation timeout. |
+| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
+| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
+| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
+| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
+| `meta_client.metadata_cache_max_capacity` | Integer | `100000` | The configuration about the cache of the metadata. |
+| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
+| `meta_client.metadata_cache_tti` | String | `5m` | -- |
+| `wal` | -- | -- | The WAL options. |
+| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
+| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.max_batch_size` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.linger` | String | `200ms` | The linger duration of a kafka batch producer.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
+| `storage` | -- | -- | The data storage options. |
+| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
+| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
+| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
+| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
+| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
+| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
+| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
+| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
+| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
+| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
+| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
+| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
+| `region_engine.mito` | -- | -- | The Mito engine options. |
+| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
+| `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
+| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
+| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
+| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
+| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
+| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
+| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
+| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
+| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
+| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
+| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
+| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
+| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
+| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
+| `region_engine.mito.inverted_index` | -- | -- | The options for inverted index in Mito engine. |
+| `region_engine.mito.inverted_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query<br/>- `auto`: automatically<br/>- `disable`: never |
+| `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `64M` | Memory threshold for performing an external sort during index creation.<br/>Setting to empty will disable external sorting, forcing all sorting operations to happen in memory. |
+| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`). |
+| `region_engine.mito.memtable` | -- | -- | -- |
+| `region_engine.mito.memtable.type` | String | `time_series` | Memtable type.<br/>- `time_series`: time-series memtable<br/>- `partition_tree`: partition tree memtable (experimental) |
+| `region_engine.mito.memtable.index_max_keys_per_shard` | Integer | `8192` | The max number of keys in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
+| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
+| `logging` | -- | -- | The logging options. |
+| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
+| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
+| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
+| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
+| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
+| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
+| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
+| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
+| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself |
+| `export_metrics.self_import.db` | String | `None` | -- |
+| `export_metrics.remote_write` | -- | -- | -- |
+| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`. |
+| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -1,171 +1,430 @@
-# Node running mode, see `standalone.example.toml`.
-mode = "distributed"
-# The datanode identifier, should be unique.
+## The running mode of the datanode. It can be `standalone` or `distributed`.
+mode = "standalone"
+
+## The datanode identifier and should be unique in the cluster.
+## +toml2docs:none-default
 node_id = 42
-# gRPC server address, "127.0.0.1:3001" by default.
-rpc_addr = "127.0.0.1:3001"
-# Hostname of this node.
-rpc_hostname = "127.0.0.1"
-# The number of gRPC server worker threads, 8 by default.
-rpc_runtime_size = 8
-# Start services after regions have obtained leases.
-# It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
+
+## Start services after regions have obtained leases.
+## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
 require_lease_before_startup = false

-# Initialize all regions in the background during the startup.
-# By default, it provides services after all regions have been initialized.
+## Initialize all regions in the background during the startup.
+## By default, it provides services after all regions have been initialized.
 init_regions_in_background = false

+## The gRPC address of the datanode.
+rpc_addr = "127.0.0.1:3001"
+
+## The hostname of the datanode.
+## +toml2docs:none-default
+rpc_hostname = "127.0.0.1"
+
+## The number of gRPC server worker threads.
+rpc_runtime_size = 8
+
+## The maximum receive message size for gRPC server.
+rpc_max_recv_message_size = "512MB"
+
+## The maximum send message size for gRPC server.
+rpc_max_send_message_size = "512MB"
+
+## Enable telemetry to collect anonymous usage data.
+enable_telemetry = true
+
+## The heartbeat options.
 [heartbeat]
-# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
+## Interval for sending heartbeat messages to the metasrv.
 interval = "3s"

-# Metasrv client options.
+## Interval for retrying to send heartbeat messages to the metasrv.
+retry_interval = "3s"
+
+## The metasrv client options.
 [meta_client]
-# Metasrv address list.
+## The addresses of the metasrv.
 metasrv_addrs = ["127.0.0.1:3002"]
-# Heartbeat timeout, 500 milliseconds by default.
-heartbeat_timeout = "500ms"
-# Operation timeout, 3 seconds by default.
+
+## Operation timeout.
 timeout = "3s"
-# Connect server timeout, 1 second by default.
+
+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
+## DDL timeout.
+ddl_timeout = "10s"
+
+## Connect server timeout.
 connect_timeout = "1s"
-# `TCP_NODELAY` option for accepted connections, true by default.
+
+## `TCP_NODELAY` option for accepted connections.
 tcp_nodelay = true

-# WAL options.
+## The configuration about the cache of the metadata.
+metadata_cache_max_capacity = 100000
+
+## TTL of the metadata cache.
+metadata_cache_ttl = "10m"
+
+# TTI of the metadata cache.
+metadata_cache_tti = "5m"
+
+## The WAL options.
 [wal]
+## The provider of the WAL.
+## - `raft_engine`: the wal is stored in the local file system by raft-engine.
+## - `kafka`: it's remote wal that data is stored in Kafka.
 provider = "raft_engine"

-# Raft-engine wal options, see `standalone.example.toml`.
-# dir = "/tmp/greptimedb/wal"
+## The directory to store the WAL files.
+## **It's only used when the provider is `raft_engine`**.
+## +toml2docs:none-default
+dir = "/tmp/greptimedb/wal"
+
+## The size of the WAL segment file.
+## **It's only used when the provider is `raft_engine`**.
 file_size = "256MB"
+
+## The threshold of the WAL size to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "4GB"
+
+## The interval to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_interval = "10m"
+
+## The read batch size.
+## **It's only used when the provider is `raft_engine`**.
 read_batch_size = 128
+
+## Whether to use sync write.
+## **It's only used when the provider is `raft_engine`**.
 sync_write = false

-# Kafka wal options, see `standalone.example.toml`.
-# broker_endpoints = ["127.0.0.1:9092"]
-# Warning: Kafka has a default limit of 1MB per message in a topic.
-# max_batch_size = "1MB"
-# linger = "200ms"
-# consumer_wait_timeout = "100ms"
-# backoff_init = "500ms"
-# backoff_max = "10s"
-# backoff_base = 2
-# backoff_deadline = "5mins"
+## Whether to reuse logically truncated log files.
+## **It's only used when the provider is `raft_engine`**.
+enable_log_recycle = true

-# Storage options, see `standalone.example.toml`.
+## Whether to pre-create log files on start up.
+## **It's only used when the provider is `raft_engine`**.
+prefill_log_files = false
+
+## Duration for fsyncing log files.
+## **It's only used when the provider is `raft_engine`**.
+sync_period = "10s"
+
+## The Kafka broker endpoints.
+## **It's only used when the provider is `kafka`**.
+broker_endpoints = ["127.0.0.1:9092"]
+
+## The max size of a single producer batch.
+## Warning: Kafka has a default limit of 1MB per message in a topic.
+## **It's only used when the provider is `kafka`**.
+max_batch_size = "1MB"
+
+## The linger duration of a kafka batch producer.
+## **It's only used when the provider is `kafka`**.
+linger = "200ms"
+
+## The consumer wait timeout.
+## **It's only used when the provider is `kafka`**.
+consumer_wait_timeout = "100ms"
+
+## The initial backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_init = "500ms"
+
+## The maximum backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_max = "10s"
+
+## The exponential backoff rate, i.e. next backoff = base * current backoff.
+## **It's only used when the provider is `kafka`**.
+backoff_base = 2
+
+## The deadline of retries.
+## **It's only used when the provider is `kafka`**.
+backoff_deadline = "5mins"
+
+# Example of using S3 as the storage.
+# [storage]
+# type = "S3"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# secret_access_key = "123456"
+# endpoint = "https://s3.amazonaws.com"
+# region = "us-west-2"
+
+# Example of using Oss as the storage.
+# [storage]
+# type = "Oss"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# access_key_secret = "123456"
+# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
+
+# Example of using Azblob as the storage.
+# [storage]
+# type = "Azblob"
+# container = "greptimedb"
+# root = "data"
+# account_name = "test"
+# account_key = "123456"
+# endpoint = "https://greptimedb.blob.core.windows.net"
+# sas_token = ""
+
+# Example of using Gcs as the storage.
+# [storage]
+# type = "Gcs"
+# bucket = "greptimedb"
+# root = "data"
+# scope = "test"
+# credential_path = "123456"
+# endpoint = "https://storage.googleapis.com"
+
+## The data storage options.
 [storage]
-# The working home directory.
+## The working home directory.
 data_home = "/tmp/greptimedb/"
-# Storage type.
-type = "File"
-# TTL for all tables. Disabled by default.
-# global_ttl = "7d"

-# Cache configuration for object storage such as 'S3' etc.
-# The local file cache directory
-# cache_path = "/path/local_cache"
-# The local file cache capacity in bytes.
-# cache_capacity = "256MB"
+## The storage type used to store the data.
+## - `File`: the data is stored in the local file system.
+## - `S3`: the data is stored in the S3 object storage.
+## - `Gcs`: the data is stored in the Google Cloud Storage.
+## - `Azblob`: the data is stored in the Azure Blob Storage.
+## - `Oss`: the data is stored in the Aliyun OSS.
+type = "File"
+
+## Cache configuration for object storage such as 'S3' etc.
+## The local file cache directory.
+## +toml2docs:none-default
+cache_path = "/path/local_cache"
+
+## The local file cache capacity in bytes.
+## +toml2docs:none-default
+cache_capacity = "256MB"
+
+## The S3 bucket name.
+## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
+## +toml2docs:none-default
+bucket = "greptimedb"
+
+## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
+## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
+## +toml2docs:none-default
+root = "greptimedb"
+
+## The access key id of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3` and `Oss`**.
+## +toml2docs:none-default
+access_key_id = "test"
+
+## The secret access key of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3`**.
+## +toml2docs:none-default
+secret_access_key = "test"
+
+## The secret access key of the aliyun account.
+## **It's only used when the storage type is `Oss`**.
+## +toml2docs:none-default
+access_key_secret = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_name = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_key = "test"
+
+## The scope of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+scope = "test"
+
+## The credential path of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+credential_path = "test"
+
+## The container of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+container = "greptimedb"
+
+## The sas token of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+sas_token = ""
+
+## The endpoint of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+endpoint = "https://s3.amazonaws.com"
+
+## The region of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+region = "us-west-2"

 # Custom storage options
-#[[storage.providers]]
-#type = "S3"
-#[[storage.providers]]
-#type = "Gcs"
+# [[storage.providers]]
+# type = "S3"
+# [[storage.providers]]
+# type = "Gcs"

-# Mito engine options
+## The region engine options. You can configure multiple region engines.
 [[region_engine]]
+
+## The Mito engine options.
 [region_engine.mito]
-# Number of region workers
+
+## Number of region workers.
 num_workers = 8
-# Request channel size of each worker
+
+## Request channel size of each worker.
 worker_channel_size = 128
-# Max batch size for a worker to handle requests
+
+## Max batch size for a worker to handle requests.
 worker_request_batch_size = 64
-# Number of meta action updated to trigger a new checkpoint for the manifest
+
+## Number of meta action updated to trigger a new checkpoint for the manifest.
 manifest_checkpoint_distance = 10
-# Whether to compress manifest and checkpoint file by gzip (default false).
+
+## Whether to compress manifest and checkpoint file by gzip (default false).
 compress_manifest = false
-# Max number of running background jobs
+
+## Max number of running background jobs
 max_background_jobs = 4
-# Interval to auto flush a region if it has not flushed yet.
+
+## Interval to auto flush a region if it has not flushed yet.
 auto_flush_interval = "1h"
-# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
+
+## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
 global_write_buffer_size = "1GB"
-# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
+
+## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
 global_write_buffer_reject_size = "2GB"
-# Cache size for SST metadata. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
+
+## Cache size for SST metadata. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
 sst_meta_cache_size = "128MB"
-# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 vector_cache_size = "512MB"
-# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 page_cache_size = "512MB"
-# Buffer size for SST writing.
+
+## Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
-# Parallelism to scan a region (default: 1/4 of cpu cores).
-# - 0: using the default value (1/4 of cpu cores).
-# - 1: scan in current thread.
-# - n: scan in parallelism n.
+
+## Parallelism to scan a region (default: 1/4 of cpu cores).
+## - `0`: using the default value (1/4 of cpu cores).
+## - `1`: scan in current thread.
+## - `n`: scan in parallelism n.
 scan_parallelism = 0
-# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
+
+## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32
-# Whether to allow stale WAL entries read during replay.
+
+## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

+## The options for inverted index in Mito engine.
 [region_engine.mito.inverted_index]
-# Whether to create the index on flush.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on flush.
+## - `auto`: automatically
+## - `disable`: never
 create_on_flush = "auto"
-# Whether to create the index on compaction.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on compaction.
+## - `auto`: automatically
+## - `disable`: never
 create_on_compaction = "auto"
-# Whether to apply the index on query
-# - "auto": automatically
-# - "disable": never
+
+## Whether to apply the index on query
+## - `auto`: automatically
+## - `disable`: never
 apply_on_query = "auto"
-# Memory threshold for performing an external sort during index creation.
-# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
+
+## Memory threshold for performing an external sort during index creation.
+## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
 mem_threshold_on_create = "64M"
-# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+
+## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
 intermediate_path = ""

 [region_engine.mito.memtable]
-# Memtable type.
-# - "partition_tree": partition tree memtable
-# - "time_series": time-series memtable (deprecated)
-type = "partition_tree"
-# The max number of keys in one shard.
+## Memtable type.
+## - `time_series`: time-series memtable
+## - `partition_tree`: partition tree memtable (experimental)
+type = "time_series"
+
+## The max number of keys in one shard.
+## Only available for `partition_tree` memtable.
 index_max_keys_per_shard = 8192
-# The max rows of data inside the actively writing buffer in one shard.
+
+## The max rows of data inside the actively writing buffer in one shard.
+## Only available for `partition_tree` memtable.
 data_freeze_threshold = 32768
-# Max dictionary bytes.
+
+## Max dictionary bytes.
+## Only available for `partition_tree` memtable.
 fork_dictionary_bytes = "1GiB"

-# Log options, see `standalone.example.toml`
-# [logging]
-# dir = "/tmp/greptimedb/logs"
-# level = "info"
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"

-# Datanode export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# [export_metrics.remote_write]
-# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
-# url = ""
-# HTTP headers of Prometheus remote-write carry
-# headers = {}
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -1,106 +1,192 @@
-# Node running mode, see `standalone.example.toml`.
-mode = "distributed"
-# The default timezone of the server
-# default_timezone = "UTC"
+## The running mode of the datanode. It can be `standalone` or `distributed`.
+mode = "standalone"

+## The default timezone of the server.
+## +toml2docs:none-default
+default_timezone = "UTC"
+
+## The heartbeat options.
 [heartbeat]
-# Interval for sending heartbeat task to the Metasrv, 5 seconds by default.
-interval = "5s"
-# Interval for retry sending heartbeat task, 5 seconds by default.
-retry_interval = "5s"
+## Interval for sending heartbeat messages to the metasrv.
+interval = "18s"

-# HTTP server options, see `standalone.example.toml`.
+## Interval for retrying to send heartbeat messages to the metasrv.
+retry_interval = "3s"
+
+## The HTTP server options.
 [http]
+## The address to bind the HTTP server.
 addr = "127.0.0.1:4000"
+## HTTP request timeout.
 timeout = "30s"
+## HTTP request body limit.
+## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 body_limit = "64MB"

-# gRPC server options, see `standalone.example.toml`.
+## The gRPC server options.
 [grpc]
+## The address to bind the gRPC server.
 addr = "127.0.0.1:4001"
+## The number of server worker threads.
 runtime_size = 8

-# MySQL server options, see `standalone.example.toml`.
+## MySQL server options.
 [mysql]
+## Whether to enable.
 enable = true
+## The addr to bind the MySQL server.
 addr = "127.0.0.1:4002"
+## The number of server worker threads.
 runtime_size = 2

-# MySQL server TLS options, see `standalone.example.toml`.
+# MySQL server TLS options.
 [mysql.tls]
+
+## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
+## - `disable` (default value)
+## - `prefer`
+## - `require`
+## - `verify-ca`
+## - `verify-full`
 mode = "disable"
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# PostgresSQL server options, see `standalone.example.toml`.
+## PostgresSQL server options.
 [postgres]
+## Whether to enable
 enable = true
+## The addr to bind the PostgresSQL server.
 addr = "127.0.0.1:4003"
+## The number of server worker threads.
 runtime_size = 2

-# PostgresSQL server TLS options, see `standalone.example.toml`.
+## PostgresSQL server TLS options, see `mysql_options.tls` section.
 [postgres.tls]
+## TLS mode.
 mode = "disable"
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# OpenTSDB protocol options, see `standalone.example.toml`.
+## OpenTSDB protocol options.
 [opentsdb]
+## Whether to enable
 enable = true
+## OpenTSDB telnet API server address.
 addr = "127.0.0.1:4242"
+## The number of server worker threads.
 runtime_size = 2

-# InfluxDB protocol options, see `standalone.example.toml`.
+## InfluxDB protocol options.
 [influxdb]
+## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

-# Prometheus remote storage options, see `standalone.example.toml`.
+## Prometheus remote storage options
 [prom_store]
+## Whether to enable Prometheus remote write and read in HTTP API.
 enable = true
-# Whether to store the data from Prometheus remote write in metric engine.
-# true by default
+## Whether to store the data from Prometheus remote write in metric engine.
 with_metric_engine = true

-# Metasrv client options, see `datanode.example.toml`.
+## The metasrv client options.
 [meta_client]
+## The addresses of the metasrv.
 metasrv_addrs = ["127.0.0.1:3002"]
+
+## Operation timeout.
 timeout = "3s"
-# DDL timeouts options.
+
+## Heartbeat timeout.
+heartbeat_timeout = "500ms"
+
+## DDL timeout.
 ddl_timeout = "10s"
+
+## Connect server timeout.
 connect_timeout = "1s"
+
+## `TCP_NODELAY` option for accepted connections.
 tcp_nodelay = true
-# The configuration about the cache of the Metadata.
-# default: 100000
+
+## The configuration about the cache of the metadata.
 metadata_cache_max_capacity = 100000
-# default: 10m
+
+## TTL of the metadata cache.
 metadata_cache_ttl = "10m"
-# default: 5m
+
+# TTI of the metadata cache.
 metadata_cache_tti = "5m"

-# Log options, see `standalone.example.toml`
-# [logging]
-# dir = "/tmp/greptimedb/logs"
-# level = "info"
-
-# Datanode options.
+## Datanode options.
 [datanode]
-# Datanode client options.
+## Datanode client options.
 [datanode.client]
 timeout = "10s"
 connect_timeout = "10s"
 tcp_nodelay = true

-# Frontend export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# for `frontend`, `self_import` is recommend to collect metrics generated by itself
-# [export_metrics.self_import]
-# db = "information_schema"
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"
+
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -1,39 +1,44 @@
-# The working home directory.
+## The working home directory.
 data_home = "/tmp/metasrv/"
-# The bind address of metasrv, "127.0.0.1:3002" by default.
+
+## The bind address of metasrv.
 bind_addr = "127.0.0.1:3002"
-# The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost.
+
+## The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost.
 server_addr = "127.0.0.1:3002"
-# Etcd server address, "127.0.0.1:2379" by default.
+
+## Etcd server address.
 store_addr = "127.0.0.1:2379"
-# Datanode selector type.
-# - "lease_based" (default value).
-# - "load_based"
-# For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
+
+## Datanode selector type.
+## - `lease_based` (default value).
+## - `load_based`
+## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
 selector = "lease_based"
-# Store data in memory, false by default.
+
+## Store data in memory.
 use_memory_store = false
-# Whether to enable greptimedb telemetry, true by default.
+
+## Whether to enable greptimedb telemetry.
 enable_telemetry = true
-# If it's not empty, the metasrv will store all data with this key prefix.
+
+## If it's not empty, the metasrv will store all data with this key prefix.
 store_key_prefix = ""

-# Log options, see `standalone.example.toml`
-# [logging]
-# dir = "/tmp/greptimedb/logs"
-# level = "info"
-
-# Procedure storage options.
+## Procedure storage options.
 [procedure]
-# Procedure max retry time.
+
+## Procedure max retry time.
 max_retry_times = 12
-# Initial retry delay of procedures, increases exponentially
+
+## Initial retry delay of procedures, increases exponentially
 retry_delay = "500ms"
-# Auto split large value
-# GreptimeDB procedure uses etcd as the default metadata storage backend.
-# The etcd the maximum size of any request is 1.5 MiB
-# 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
-# Comments out the `max_metadata_value_size`, for don't split large value (no limit).
+
+## Auto split large value
+## GreptimeDB procedure uses etcd as the default metadata storage backend.
+## The etcd the maximum size of any request is 1.5 MiB
+## 1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)
+## Comments out the `max_metadata_value_size`, for don't split large value (no limit).
 max_metadata_value_size = "1500KiB"

 # Failure detectors options.
@@ -43,57 +48,96 @@ min_std_deviation = "100ms"
 acceptable_heartbeat_pause = "3000ms"
 first_heartbeat_estimate = "1000ms"

-# # Datanode options.
-# [datanode]
-# # Datanode client options.
-# [datanode.client_options]
-# timeout = "10s"
-# connect_timeout = "10s"
-# tcp_nodelay = true
+## Datanode options.
+[datanode]
+## Datanode client options.
+[datanode.client]
+timeout = "10s"
+connect_timeout = "10s"
+tcp_nodelay = true

 [wal]
 # Available wal providers:
-# - "raft_engine" (default)
-# - "kafka"
+# - `raft_engine` (default): there're none raft-engine wal config since metasrv only involves in remote wal currently.
+# - `kafka`: metasrv **have to be** configured with kafka wal config when using kafka wal provider in datanode.
 provider = "raft_engine"

-# There're none raft-engine wal config since meta srv only involves in remote wal currently.
-
 # Kafka wal config.
-# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
-# broker_endpoints = ["127.0.0.1:9092"]
-# Number of topics to be created upon start.
-# num_topics = 64
-# Topic selector type.
-# Available selector types: 
-# - "round_robin" (default)
-# selector_type = "round_robin"
-# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
-# topic_name_prefix = "greptimedb_wal_topic"
-# Expected number of replicas of each partition.
-# replication_factor = 1
-# Above which a topic creation operation will be cancelled.
-# create_topic_timeout = "30s"
-# The initial backoff for kafka clients.
-# backoff_init = "500ms"
-# The maximum backoff for kafka clients.
-# backoff_max = "10s"
-# Exponential backoff rate, i.e. next backoff = base * current backoff.
-# backoff_base = 2
-# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
-# backoff_deadline = "5mins"

-# Metasrv export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# [export_metrics.remote_write]
-# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
-# url = ""
-# HTTP headers of Prometheus remote-write carry
-# headers = {}
+## The broker endpoints of the Kafka cluster.
+broker_endpoints = ["127.0.0.1:9092"]
+
+## Number of topics to be created upon start.
+num_topics = 64
+
+## Topic selector type.
+## Available selector types:
+## - `round_robin` (default)
+selector_type = "round_robin"
+
+## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
+topic_name_prefix = "greptimedb_wal_topic"
+
+## Expected number of replicas of each partition.
+replication_factor = 1
+
+## Above which a topic creation operation will be cancelled.
+create_topic_timeout = "30s"
+## The initial backoff for kafka clients.
+backoff_init = "500ms"
+
+## The maximum backoff for kafka clients.
+backoff_max = "10s"
+
+## Exponential backoff rate, i.e. next backoff = base * current backoff.
+backoff_base = 2
+
+## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
+backoff_deadline = "5mins"
+
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"
+
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -1,286 +1,477 @@
-# Node running mode, "standalone" or "distributed".
+## The running mode of the datanode. It can be `standalone` or `distributed`.
 mode = "standalone"
-# Whether to enable greptimedb telemetry, true by default.
-enable_telemetry = true
-# The default timezone of the server
-# default_timezone = "UTC"

-# HTTP server options.
+## Enable telemetry to collect anonymous usage data.
+enable_telemetry = true
+
+## The default timezone of the server.
+## +toml2docs:none-default
+default_timezone = "UTC"
+
+## The HTTP server options.
 [http]
-# Server address, "127.0.0.1:4000" by default.
+## The address to bind the HTTP server.
 addr = "127.0.0.1:4000"
-# HTTP request timeout, 30s by default.
+## HTTP request timeout.
 timeout = "30s"
-# HTTP request body limit, 64Mb by default.
-# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
+## HTTP request body limit.
+## Support the following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
 body_limit = "64MB"

-# gRPC server options.
+## The gRPC server options.
 [grpc]
-# Server address, "127.0.0.1:4001" by default.
+## The address to bind the gRPC server.
 addr = "127.0.0.1:4001"
-# The number of server worker threads, 8 by default.
+## The number of server worker threads.
 runtime_size = 8

-# MySQL server options.
+## MySQL server options.
 [mysql]
-# Whether to enable
+## Whether to enable.
 enable = true
-# Server address, "127.0.0.1:4002" by default.
+## The addr to bind the MySQL server.
 addr = "127.0.0.1:4002"
-# The number of server worker threads, 2 by default.
+## The number of server worker threads.
 runtime_size = 2

 # MySQL server TLS options.
 [mysql.tls]
-# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
-# - "disable" (default value)
-# - "prefer"
-# - "require"
-# - "verify-ca"
-# - "verify-full"
+
+## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
+## - `disable` (default value)
+## - `prefer`
+## - `require`
+## - `verify-ca`
+## - `verify-full`
 mode = "disable"
-# Certificate file path.
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
-# Private key file path.
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
-# Watch for Certificate and key file change and auto reload
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# PostgresSQL server options.
+## PostgresSQL server options.
 [postgres]
-# Whether to enable
+## Whether to enable
 enable = true
-# Server address, "127.0.0.1:4003" by default.
+## The addr to bind the PostgresSQL server.
 addr = "127.0.0.1:4003"
-# The number of server worker threads, 2 by default.
+## The number of server worker threads.
 runtime_size = 2

-# PostgresSQL server TLS options, see `[mysql_options.tls]` section.
+## PostgresSQL server TLS options, see `mysql_options.tls` section.
 [postgres.tls]
-# TLS mode.
+## TLS mode.
 mode = "disable"
-# certificate file path.
+
+## Certificate file path.
+## +toml2docs:none-default
 cert_path = ""
-# private key file path.
+
+## Private key file path.
+## +toml2docs:none-default
 key_path = ""
-# Watch for Certificate and key file change and auto reload
+
+## Watch for Certificate and key file change and auto reload
 watch = false

-# OpenTSDB protocol options.
+## OpenTSDB protocol options.
 [opentsdb]
-# Whether to enable
+## Whether to enable
 enable = true
-# OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
+## OpenTSDB telnet API server address.
 addr = "127.0.0.1:4242"
-# The number of server worker threads, 2 by default.
+## The number of server worker threads.
 runtime_size = 2

-# InfluxDB protocol options.
+## InfluxDB protocol options.
 [influxdb]
-# Whether to enable InfluxDB protocol in HTTP API, true by default.
+## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

-# Prometheus remote storage options
+## Prometheus remote storage options
 [prom_store]
-# Whether to enable Prometheus remote write and read in HTTP API, true by default.
+## Whether to enable Prometheus remote write and read in HTTP API.
 enable = true
-# Whether to store the data from Prometheus remote write in metric engine.
-# true by default
+## Whether to store the data from Prometheus remote write in metric engine.
 with_metric_engine = true

+## The WAL options.
 [wal]
-# Available wal providers:
-# - "raft_engine" (default)
-# - "kafka"
+## The provider of the WAL.
+## - `raft_engine`: the wal is stored in the local file system by raft-engine.
+## - `kafka`: it's remote wal that data is stored in Kafka.
 provider = "raft_engine"

-# Raft-engine wal options.
-# WAL data directory
-# dir = "/tmp/greptimedb/wal"
-# WAL file size in bytes.
+## The directory to store the WAL files.
+## **It's only used when the provider is `raft_engine`**.
+## +toml2docs:none-default
+dir = "/tmp/greptimedb/wal"
+
+## The size of the WAL segment file.
+## **It's only used when the provider is `raft_engine`**.
 file_size = "256MB"
-# WAL purge threshold.
+
+## The threshold of the WAL size to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "4GB"
-# WAL purge interval in seconds.
+
+## The interval to trigger a flush.
+## **It's only used when the provider is `raft_engine`**.
 purge_interval = "10m"
-# WAL read batch size.
+
+## The read batch size.
+## **It's only used when the provider is `raft_engine`**.
 read_batch_size = 128
-# Whether to sync log file after every write.
+
+## Whether to use sync write.
+## **It's only used when the provider is `raft_engine`**.
 sync_write = false
-# Whether to reuse logically truncated log files.
+
+## Whether to reuse logically truncated log files.
+## **It's only used when the provider is `raft_engine`**.
 enable_log_recycle = true
-# Whether to pre-create log files on start up
+
+## Whether to pre-create log files on start up.
+## **It's only used when the provider is `raft_engine`**.
 prefill_log_files = false
-# Duration for fsyncing log files.
-sync_period = "1000ms"

-# Kafka wal options.
-# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
-# broker_endpoints = ["127.0.0.1:9092"]
+## Duration for fsyncing log files.
+## **It's only used when the provider is `raft_engine`**.
+sync_period = "10s"

-# Number of topics to be created upon start.
-# num_topics = 64
-# Topic selector type.
-# Available selector types:
-# - "round_robin" (default)
-# selector_type = "round_robin"
-# The prefix of topic name.
-# topic_name_prefix = "greptimedb_wal_topic"
-# The number of replicas of each partition.
-# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
-# replication_factor = 1
+## The Kafka broker endpoints.
+## **It's only used when the provider is `kafka`**.
+broker_endpoints = ["127.0.0.1:9092"]

-# The max size of a single producer batch.
-# Warning: Kafka has a default limit of 1MB per message in a topic.
-# max_batch_size = "1MB"
-# The linger duration.
-# linger = "200ms"
-# The consumer wait timeout.
-# consumer_wait_timeout = "100ms"
-# Create topic timeout.
-# create_topic_timeout = "30s"
+## The max size of a single producer batch.
+## Warning: Kafka has a default limit of 1MB per message in a topic.
+## **It's only used when the provider is `kafka`**.
+max_batch_size = "1MB"

-# The initial backoff delay.
-# backoff_init = "500ms"
-# The maximum backoff delay.
-# backoff_max = "10s"
-# Exponential backoff rate, i.e. next backoff = base * current backoff.
-# backoff_base = 2
-# The deadline of retries.
-# backoff_deadline = "5mins"
+## The linger duration of a kafka batch producer.
+## **It's only used when the provider is `kafka`**.
+linger = "200ms"

-# Metadata storage options.
+## The consumer wait timeout.
+## **It's only used when the provider is `kafka`**.
+consumer_wait_timeout = "100ms"
+
+## The initial backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_init = "500ms"
+
+## The maximum backoff delay.
+## **It's only used when the provider is `kafka`**.
+backoff_max = "10s"
+
+## The exponential backoff rate, i.e. next backoff = base * current backoff.
+## **It's only used when the provider is `kafka`**.
+backoff_base = 2
+
+## The deadline of retries.
+## **It's only used when the provider is `kafka`**.
+backoff_deadline = "5mins"
+
+## Metadata storage options.
 [metadata_store]
-# Kv file size in bytes.
+## Kv file size in bytes.
 file_size = "256MB"
-# Kv purge threshold.
+## Kv purge threshold.
 purge_threshold = "4GB"

-# Procedure storage options.
+## Procedure storage options.
 [procedure]
-# Procedure max retry time.
+## Procedure max retry time.
 max_retry_times = 3
-# Initial retry delay of procedures, increases exponentially
+## Initial retry delay of procedures, increases exponentially
 retry_delay = "500ms"

-# Storage options.
+# Example of using S3 as the storage.
+# [storage]
+# type = "S3"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# secret_access_key = "123456"
+# endpoint = "https://s3.amazonaws.com"
+# region = "us-west-2"
+
+# Example of using Oss as the storage.
+# [storage]
+# type = "Oss"
+# bucket = "greptimedb"
+# root = "data"
+# access_key_id = "test"
+# access_key_secret = "123456"
+# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
+
+# Example of using Azblob as the storage.
+# [storage]
+# type = "Azblob"
+# container = "greptimedb"
+# root = "data"
+# account_name = "test"
+# account_key = "123456"
+# endpoint = "https://greptimedb.blob.core.windows.net"
+# sas_token = ""
+
+# Example of using Gcs as the storage.
+# [storage]
+# type = "Gcs"
+# bucket = "greptimedb"
+# root = "data"
+# scope = "test"
+# credential_path = "123456"
+# endpoint = "https://storage.googleapis.com"
+
+## The data storage options.
 [storage]
-# The working home directory.
+## The working home directory.
 data_home = "/tmp/greptimedb/"
-# Storage type.
+
+## The storage type used to store the data.
+## - `File`: the data is stored in the local file system.
+## - `S3`: the data is stored in the S3 object storage.
+## - `Gcs`: the data is stored in the Google Cloud Storage.
+## - `Azblob`: the data is stored in the Azure Blob Storage.
+## - `Oss`: the data is stored in the Aliyun OSS.
 type = "File"
-# TTL for all tables. Disabled by default.
-# global_ttl = "7d"
-# Cache configuration for object storage such as 'S3' etc.
-# cache_path = "/path/local_cache"
-# The local file cache capacity in bytes.
-# cache_capacity = "256MB"
+
+## Cache configuration for object storage such as 'S3' etc.
+## The local file cache directory.
+## +toml2docs:none-default
+cache_path = "/path/local_cache"
+
+## The local file cache capacity in bytes.
+## +toml2docs:none-default
+cache_capacity = "256MB"
+
+## The S3 bucket name.
+## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
+## +toml2docs:none-default
+bucket = "greptimedb"
+
+## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
+## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
+## +toml2docs:none-default
+root = "greptimedb"
+
+## The access key id of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3` and `Oss`**.
+## +toml2docs:none-default
+access_key_id = "test"
+
+## The secret access key of the aws account.
+## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
+## **It's only used when the storage type is `S3`**.
+## +toml2docs:none-default
+secret_access_key = "test"
+
+## The secret access key of the aliyun account.
+## **It's only used when the storage type is `Oss`**.
+## +toml2docs:none-default
+access_key_secret = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_name = "test"
+
+## The account key of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+account_key = "test"
+
+## The scope of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+scope = "test"
+
+## The credential path of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+credential_path = "test"
+
+## The container of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+container = "greptimedb"
+
+## The sas token of the azure account.
+## **It's only used when the storage type is `Azblob`**.
+## +toml2docs:none-default
+sas_token = ""
+
+## The endpoint of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+endpoint = "https://s3.amazonaws.com"
+
+## The region of the S3 service.
+## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
+## +toml2docs:none-default
+region = "us-west-2"

 # Custom storage options
-#[[storage.providers]]
-#type = "S3"
-#[[storage.providers]]
-#type = "Gcs"
+# [[storage.providers]]
+# type = "S3"
+# [[storage.providers]]
+# type = "Gcs"

-# Mito engine options
+## The region engine options. You can configure multiple region engines.
 [[region_engine]]
+
+## The Mito engine options.
 [region_engine.mito]
-# Number of region workers
+
+## Number of region workers.
 num_workers = 8
-# Request channel size of each worker
+
+## Request channel size of each worker.
 worker_channel_size = 128
-# Max batch size for a worker to handle requests
+
+## Max batch size for a worker to handle requests.
 worker_request_batch_size = 64
-# Number of meta action updated to trigger a new checkpoint for the manifest
+
+## Number of meta action updated to trigger a new checkpoint for the manifest.
 manifest_checkpoint_distance = 10
-# Whether to compress manifest and checkpoint file by gzip (default false).
+
+## Whether to compress manifest and checkpoint file by gzip (default false).
 compress_manifest = false
-# Max number of running background jobs
+
+## Max number of running background jobs
 max_background_jobs = 4
-# Interval to auto flush a region if it has not flushed yet.
+
+## Interval to auto flush a region if it has not flushed yet.
 auto_flush_interval = "1h"
-# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
+
+## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
 global_write_buffer_size = "1GB"
-# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
+
+## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
 global_write_buffer_reject_size = "2GB"
-# Cache size for SST metadata. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
+
+## Cache size for SST metadata. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
 sst_meta_cache_size = "128MB"
-# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 vector_cache_size = "512MB"
-# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
-# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+
+## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
+## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 page_cache_size = "512MB"
-# Buffer size for SST writing.
+
+## Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
-# Parallelism to scan a region (default: 1/4 of cpu cores).
-# - 0: using the default value (1/4 of cpu cores).
-# - 1: scan in current thread.
-# - n: scan in parallelism n.
+
+## Parallelism to scan a region (default: 1/4 of cpu cores).
+## - `0`: using the default value (1/4 of cpu cores).
+## - `1`: scan in current thread.
+## - `n`: scan in parallelism n.
 scan_parallelism = 0
-# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
+
+## Capacity of the channel to send data from parallel scan tasks to the main task.
 parallel_scan_channel_size = 32
-# Whether to allow stale WAL entries read during replay.
+
+## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false

+## The options for inverted index in Mito engine.
 [region_engine.mito.inverted_index]
-# Whether to create the index on flush.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on flush.
+## - `auto`: automatically
+## - `disable`: never
 create_on_flush = "auto"
-# Whether to create the index on compaction.
-# - "auto": automatically
-# - "disable": never
+
+## Whether to create the index on compaction.
+## - `auto`: automatically
+## - `disable`: never
 create_on_compaction = "auto"
-# Whether to apply the index on query
-# - "auto": automatically
-# - "disable": never
+
+## Whether to apply the index on query
+## - `auto`: automatically
+## - `disable`: never
 apply_on_query = "auto"
-# Memory threshold for performing an external sort during index creation.
-# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
+
+## Memory threshold for performing an external sort during index creation.
+## Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
 mem_threshold_on_create = "64M"
-# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+
+## File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
 intermediate_path = ""

 [region_engine.mito.memtable]
-# Memtable type.
-# - "partition_tree": partition tree memtable
-# - "time_series": time-series memtable (deprecated)
-type = "partition_tree"
-# The max number of keys in one shard.
+## Memtable type.
+## - `time_series`: time-series memtable
+## - `partition_tree`: partition tree memtable (experimental)
+type = "time_series"
+
+## The max number of keys in one shard.
+## Only available for `partition_tree` memtable.
 index_max_keys_per_shard = 8192
-# The max rows of data inside the actively writing buffer in one shard.
+
+## The max rows of data inside the actively writing buffer in one shard.
+## Only available for `partition_tree` memtable.
 data_freeze_threshold = 32768
-# Max dictionary bytes.
+
+## Max dictionary bytes.
+## Only available for `partition_tree` memtable.
 fork_dictionary_bytes = "1GiB"

-# Log options
-# [logging]
-# Specify logs directory.
-# dir = "/tmp/greptimedb/logs"
-# Specify the log level [info | debug | error | warn]
-# level = "info"
-# whether enable tracing, default is false
-# enable_otlp_tracing = false
-# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
-# otlp_endpoint = "localhost:4317"
-# Whether to append logs to stdout. Defaults to true.
-# append_stdout = true
-# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
-# [logging.tracing_sample_ratio]
-# default_ratio = 0.0
+## The logging options.
+[logging]
+## The directory to store the log files.
+dir = "/tmp/greptimedb/logs"

-# Standalone export the metrics generated by itself
-# encoded to Prometheus remote-write format
-# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
-# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
-# [export_metrics]
-# whether enable export metrics, default is false
-# enable = false
-# The interval of export metrics
-# write_interval = "30s"
-# for `standalone`, `self_import` is recommend to collect metrics generated by itself
-# [export_metrics.self_import]
-# db = "information_schema"
+## The log level. Can be `info`/`debug`/`warn`/`error`.
+## +toml2docs:none-default
+level = "info"
+
+## Enable OTLP tracing.
+enable_otlp_tracing = false
+
+## The OTLP tracing endpoint.
+## +toml2docs:none-default
+otlp_endpoint = ""
+
+## Whether to append logs to stdout.
+append_stdout = true
+
+## The percentage of tracing will be sampled and exported.
+## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
+## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+[logging.tracing_sample_ratio]
+default_ratio = 1.0
+
+## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
+## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+[export_metrics]
+
+## whether enable export metrics.
+enable = false
+
+## The interval of export metrics.
+write_interval = "30s"
+
+## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
+[export_metrics.self_import]
+## +toml2docs:none-default
+db = "information_schema"
+
+[export_metrics.remote_write]
+## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`.
+url = ""
+
+## HTTP headers of Prometheus remote-write carry.
+headers = { }
--- a/docs/how-to/how-to-write-fuzz-tests.md
+++ b/docs/how-to/how-to-write-fuzz-tests.md
@@ -0,0 +1,136 @@
+# How to write fuzz tests
+
+This document introduces how to write fuzz tests in GreptimeDB.
+
+## What is a fuzz test
+Fuzz test is tool that leverage deterministic random generation to assist in finding bugs. The goal of fuzz tests is to identify inputs generated by the fuzzer that cause system panics, crashes, or unexpected behaviors to occur. And we are using the [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz) to run our fuzz test targets. 
+
+## Why we need them
+- Find bugs by leveraging random generation
+- Integrate with other tests (e.g., e2e)
+
+## Resources
+All fuzz test-related resources are located in the `/tests-fuzz` directory.
+There are two types of resources: (1) fundamental components and (2) test targets.
+
+### Fundamental components 
+They are located in the `/tests-fuzz/src` directory. The fundamental components define how to generate SQLs (including dialects for different protocols) and validate execution results (e.g., column attribute validation), etc.
+
+### Test targets
+They are located in the `/tests-fuzz/targets` directory, with each file representing an independent fuzz test case. The target utilizes fundamental components to generate SQLs, sends the generated SQLs via specified protocol, and validates the results of SQL execution.
+
+Figure 1 illustrates the fundamental components of the fuzz test provide the ability to generate random SQLs. It utilizes a Random Number Generator (Rng) to generate the Intermediate Representation (IR), then employs a DialectTranslator to produce specified dialects for different protocols. Finally, the fuzz tests send the generated SQL via the specified protocol and verify that the execution results meet expectations.
+```
+                            Rng                                 
+                             |                                  
+                             |                                  
+                             v                                  
+                       ExprGenerator                            
+                             |                                  
+                             |                                  
+                             v                                  
+               Intermediate representation (IR)                 
+                             |                                  
+                             |                                  
+      +----------------------+----------------------+           
+      |                      |                      |           
+      v                      v                      v           
+MySQLTranslator    PostgreSQLTranslator   OtherDialectTranslator
+      |                      |                      |           
+      |                      |                      |           
+      v                      v                      v           
+SQL(MySQL Dialect)         .....                  .....         
+      |
+      |
+      v
+  Fuzz Test
+
+```
+(Figure1: Overview of fuzz tests)
+
+For more details about fuzz targets and fundamental components, please refer to this [tracking issue](https://github.com/GreptimeTeam/greptimedb/issues/3174).
+
+## How to add a fuzz test target
+
+1. Create an empty rust source file under the `/tests-fuzz/targets/<fuzz-target>.rs` directory.
+
+2. Register the fuzz test target in the `/tests-fuzz/Cargo.toml` file.
+
+```toml
+[[bin]]
+name = "<fuzz-target>"
+path = "targets/<fuzz-target>.rs"
+test = false
+bench = false
+doc = false
+```
+
+3. Define the `FuzzInput` in the `/tests-fuzz/targets/<fuzz-target>.rs`.
+
+```rust
+#![no_main]
+use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
+
+#[derive(Clone, Debug)]
+struct FuzzInput {
+    seed: u64,
+}
+
+impl Arbitrary<'_> for FuzzInput {
+    fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
+        let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
+        Ok(FuzzInput { seed })
+    }
+}
+```
+
+4. Write your first fuzz test target in the `/tests-fuzz/targets/<fuzz-target>.rs`.
+
+```rust
+use libfuzzer_sys::fuzz_target;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaChaRng;
+use snafu::ResultExt;
+use sqlx::{MySql, Pool};
+use tests_fuzz::fake::{
+    merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
+    MappedGenerator, WordGenerator,
+};
+use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder;
+use tests_fuzz::generator::Generator;
+use tests_fuzz::ir::CreateTableExpr;
+use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
+use tests_fuzz::translator::DslTranslator;
+use tests_fuzz::utils::{init_greptime_connections, Connections};
+
+fuzz_target!(|input: FuzzInput| {
+    common_telemetry::init_default_ut_logging();
+    common_runtime::block_on_write(async {
+        let Connections { mysql } = init_greptime_connections().await;
+            let mut rng = ChaChaRng::seed_from_u64(input.seed);
+            let columns = rng.gen_range(2..30);
+            let create_table_generator = CreateTableExprGeneratorBuilder::default()
+                .name_generator(Box::new(MappedGenerator::new(
+                    WordGenerator,
+                    merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
+                )))
+                .columns(columns)
+                .engine("mito")
+                .if_not_exists(if_not_exists)
+                .build()
+                .unwrap();
+            let ir = create_table_generator.generate(&mut rng);
+            let translator = CreateTableExprTranslator;
+            let sql = translator.translate(&expr).unwrap();
+            mysql.execute(&sql).await
+    })
+});
+```
+
+5. Run your fuzz test target
+
+```bash
+    cargo fuzz run <fuzz-target> --fuzz-dir tests-fuzz
+```
+
+For more details, please refer to this [document](/tests-fuzz/README.md).
--- a/docs/rfcs/2023-07-06-table-engine-refactor.md
+++ b/docs/rfcs/2023-07-06-table-engine-refactor.md
@@ -27,8 +27,8 @@ subgraph Frontend["Frontend"]
    end
 end

-MyTable --> MetaSrv
-MetaSrv --> ETCD
+MyTable --> Metasrv
+Metasrv --> ETCD

 MyTable-->TableEngine0
 MyTable-->TableEngine1
@@ -95,8 +95,8 @@ subgraph Frontend["Frontend"]
    end
 end

-MyTable --> MetaSrv
-MetaSrv --> ETCD
+MyTable --> Metasrv
+Metasrv --> ETCD

 MyTable-->RegionEngine
 MyTable-->RegionEngine1
--- a/docs/rfcs/2024-01-17-dataflow-framework.md
+++ b/docs/rfcs/2024-01-17-dataflow-framework.md
@@ -36,7 +36,7 @@ Hence, we choose the third option, and use a simple logical plan that's anagonis
 ## Deploy mode and protocol
 - Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
 - It accepts insert request Rows, which is used between frontend and datanode.
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
+- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in Metasrv.
 - It also persists results in the format of Rows to frontend.
 - The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
 - Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
--- a/docs/schema-structs.md
+++ b/docs/schema-structs.md
@@ -73,7 +73,7 @@ CREATE TABLE cpu (
    usage_system DOUBLE,
    datacenter STRING,
    TIME INDEX (ts),
-    PRIMARY KEY(datacenter, host)) ENGINE=mito WITH(regions=1);
+    PRIMARY KEY(datacenter, host)) ENGINE=mito;
 ```

 Then the table's `TableMeta` may look like this:
@@ -249,7 +249,7 @@ CREATE TABLE cpu (
    usage_system DOUBLE,
    datacenter STRING,
    TIME INDEX (ts),
-    PRIMARY KEY(datacenter, host)) ENGINE=mito WITH(regions=1);
+    PRIMARY KEY(datacenter, host)) ENGINE=mito;

 select ts, usage_system from cpu;
 ```
--- a/docs/style-guide.md
+++ b/docs/style-guide.md
@@ -0,0 +1,46 @@
+# GreptimeDB Style Guide
+
+This style guide is intended to help contributors to GreptimeDB write code that is consistent with the rest of the codebase. It is a living document and will be updated as the codebase evolves.
+
+It's mainly an complement to the [Rust Style Guide](https://pingcap.github.io/style-guide/rust/).
+
+## Table of Contents
+
+- Formatting
+- Modules
+- Comments
+
+## Formatting
+
+- Place all `mod` declaration before any `use`.
+- Use `unimplemented!()` instead of `todo!()` for things that aren't likely to be implemented.
+- Add an empty line before and after declaration blocks.
+- Place comment before attributes (`#[]`) and derive (`#[derive]`).
+
+## Modules
+
+- Use the file with same name instead of `mod.rs` to define a module. E.g.:
+
+```
+.
+├── cache
+│  ├── cache_size.rs
+│  └── write_cache.rs
+└── cache.rs
+```
+
+## Comments
+
+- Add comments for public functions and structs.
+- Prefer document comment (`///`) over normal comment (`//`) for structs, fields, functions etc.
+- Add link (`[]`) to struct, method, or any other reference. And make sure that link works.
+
+## Error handling
+
+- Define a custom error type for the module if needed.
+- Prefer `with_context()` over `context()` when allocation is needed to construct an error.
+- Use `error!()` or `warn!()` macros in the `common_telemetry` crate to log errors. E.g.:
+
+```rust
+error!(e; "Failed to do something");
+```
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2023-12-19"
+channel = "nightly-2024-04-18"
--- a/src/api/src/lib.rs
+++ b/src/api/src/lib.rs
@@ -21,6 +21,7 @@ pub mod prom_store {
    }
 }

+pub mod region;
 pub mod v1;

 pub use greptime_proto;
--- a/src/api/src/region.rs
+++ b/src/api/src/region.rs
@@ -0,0 +1,42 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use common_base::AffectedRows;
+use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
+
+/// This result struct is derived from [RegionResponseV1]
+#[derive(Debug)]
+pub struct RegionResponse {
+    pub affected_rows: AffectedRows,
+    pub extension: HashMap<String, Vec<u8>>,
+}
+
+impl RegionResponse {
+    pub fn from_region_response(region_response: RegionResponseV1) -> Self {
+        Self {
+            affected_rows: region_response.affected_rows as _,
+            extension: region_response.extension,
+        }
+    }
+
+    /// Creates one response without extension
+    pub fn new(affected_rows: AffectedRows) -> Self {
+        Self {
+            affected_rows,
+            extension: Default::default(),
+        }
+    }
+}
--- a/src/auth/src/tests.rs
+++ b/src/auth/src/tests.rs
@@ -45,9 +45,9 @@ impl Default for MockUserProvider {

 impl MockUserProvider {
    pub fn set_authorization_info(&mut self, info: DatabaseAuthInfo) {
-        self.catalog = info.catalog.to_owned();
-        self.schema = info.schema.to_owned();
-        self.username = info.username.to_owned();
+        info.catalog.clone_into(&mut self.catalog);
+        info.schema.clone_into(&mut self.schema);
+        info.username.clone_into(&mut self.username);
    }
 }

--- a/src/catalog/src/error.rs
+++ b/src/catalog/src/error.rs
@@ -216,7 +216,7 @@ pub enum Error {
    },

    #[snafu(display("Failed to perform metasrv operation"))]
-    MetaSrv {
+    Metasrv {
        location: Location,
        source: meta_client::error::Error,
    },
@@ -304,7 +304,7 @@ impl ErrorExt for Error {
            | Error::CreateTable { source, .. }
            | Error::TableSchemaMismatch { source, .. } => source.status_code(),

-            Error::MetaSrv { source, .. } => source.status_code(),
+            Error::Metasrv { source, .. } => source.status_code(),
            Error::SystemCatalogTableScan { source, .. } => source.status_code(),
            Error::SystemCatalogTableScanExec { source, .. } => source.status_code(),
            Error::InvalidTableInfoInCatalog { source, .. } => source.status_code(),
--- a/src/catalog/src/information_schema.rs
+++ b/src/catalog/src/information_schema.rs
@@ -20,6 +20,7 @@ mod predicate;
 mod region_peers;
 mod runtime_metrics;
 pub mod schemata;
+mod table_constraints;
 mod table_names;
 pub mod tables;

@@ -52,6 +53,7 @@ use crate::information_schema::partitions::InformationSchemaPartitions;
 use crate::information_schema::region_peers::InformationSchemaRegionPeers;
 use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
 use crate::information_schema::schemata::InformationSchemaSchemata;
+use crate::information_schema::table_constraints::InformationSchemaTableConstraints;
 use crate::information_schema::tables::InformationSchemaTables;
 use crate::CatalogManager;

@@ -173,6 +175,10 @@ impl InformationSchemaProvider {
            KEY_COLUMN_USAGE.to_string(),
            self.build_table(KEY_COLUMN_USAGE).unwrap(),
        );
+        tables.insert(
+            TABLE_CONSTRAINTS.to_string(),
+            self.build_table(TABLE_CONSTRAINTS).unwrap(),
+        );

        // Add memory tables
        for name in MEMORY_TABLES.iter() {
@@ -241,6 +247,10 @@ impl InformationSchemaProvider {
                self.catalog_name.clone(),
                self.catalog_manager.clone(),
            )) as _),
+            TABLE_CONSTRAINTS => Some(Arc::new(InformationSchemaTableConstraints::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _),
            _ => None,
        }
    }
--- a/src/catalog/src/information_schema/columns.rs
+++ b/src/catalog/src/information_schema/columns.rs
@@ -274,8 +274,8 @@ impl InformationSchemaColumnsBuilder {
                    };

                    self.add_column(
-                        idx,
                        &predicates,
+                        idx,
                        &catalog_name,
                        &schema_name,
                        &table.table_info().name,
@@ -292,8 +292,8 @@ impl InformationSchemaColumnsBuilder {
    #[allow(clippy::too_many_arguments)]
    fn add_column(
        &mut self,
-        index: usize,
        predicates: &Predicates,
+        index: usize,
        catalog_name: &str,
        schema_name: &str,
        table_name: &str,
--- a/src/catalog/src/information_schema/key_column_usage.rs
+++ b/src/catalog/src/information_schema/key_column_usage.rs
@@ -49,6 +49,11 @@ pub const COLUMN_NAME: &str = "column_name";
 pub const ORDINAL_POSITION: &str = "ordinal_position";
 const INIT_CAPACITY: usize = 42;

+/// Primary key constraint name
+pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
+/// Time index constraint name
+pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
+
 /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
 pub(super) struct InformationSchemaKeyColumnUsage {
    schema: SchemaRef,
@@ -232,7 +237,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
                            self.add_key_column_usage(
                                &predicates,
                                &schema_name,
-                                "TIME INDEX",
+                                TIME_INDEX_CONSTRAINT_NAME,
                                &catalog_name,
                                &schema_name,
                                &table_name,
@@ -262,7 +267,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
            self.add_key_column_usage(
                &predicates,
                &schema_name,
-                "PRIMARY",
+                PRI_CONSTRAINT_NAME,
                &catalog_name,
                &schema_name,
                &table_name,
--- a/src/catalog/src/information_schema/predicate.rs
+++ b/src/catalog/src/information_schema/predicate.rs
@@ -109,11 +109,7 @@ impl Predicate {
                };
            }
            Predicate::Not(p) => {
-                let Some(b) = p.eval(row) else {
-                    return None;
-                };
-
-                return Some(!b);
+                return Some(!p.eval(row)?);
            }
        }

@@ -125,13 +121,7 @@ impl Predicate {
    fn from_expr(expr: DfExpr) -> Option<Predicate> {
        match expr {
            // NOT expr
-            DfExpr::Not(expr) => {
-                let Some(p) = Self::from_expr(*expr) else {
-                    return None;
-                };
-
-                Some(Predicate::Not(Box::new(p)))
-            }
+            DfExpr::Not(expr) => Some(Predicate::Not(Box::new(Self::from_expr(*expr)?))),
            // expr LIKE pattern
            DfExpr::Like(Like {
                negated,
@@ -178,25 +168,15 @@ impl Predicate {
                }
                // left AND right
                (left, Operator::And, right) => {
-                    let Some(left) = Self::from_expr(left) else {
-                        return None;
-                    };
-
-                    let Some(right) = Self::from_expr(right) else {
-                        return None;
-                    };
+                    let left = Self::from_expr(left)?;
+                    let right = Self::from_expr(right)?;

                    Some(Predicate::And(Box::new(left), Box::new(right)))
                }
                // left OR right
                (left, Operator::Or, right) => {
-                    let Some(left) = Self::from_expr(left) else {
-                        return None;
-                    };
-
-                    let Some(right) = Self::from_expr(right) else {
-                        return None;
-                    };
+                    let left = Self::from_expr(left)?;
+                    let right = Self::from_expr(right)?;

                    Some(Predicate::Or(Box::new(left), Box::new(right)))
                }
--- a/src/catalog/src/information_schema/table_constraints.rs
+++ b/src/catalog/src/information_schema/table_constraints.rs
@@ -0,0 +1,286 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, MutableVector};
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, VectorRef};
+use futures::TryStreamExt;
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::{ScanRequest, TableId};
+
+use super::{InformationTable, TABLE_CONSTRAINTS};
+use crate::error::{
+    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::key_column_usage::{
+    PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
+};
+use crate::information_schema::Predicates;
+use crate::CatalogManager;
+
+/// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
+pub(super) struct InformationSchemaTableConstraints {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+const CONSTRAINT_CATALOG: &str = "constraint_catalog";
+const CONSTRAINT_SCHEMA: &str = "constraint_schema";
+const CONSTRAINT_NAME: &str = "constraint_name";
+const TABLE_SCHEMA: &str = "table_schema";
+const TABLE_NAME: &str = "table_name";
+const CONSTRAINT_TYPE: &str = "constraint_type";
+const ENFORCED: &str = "enforced";
+
+const INIT_CAPACITY: usize = 42;
+
+const TIME_INDEX_CONSTRAINT_TYPE: &str = "TIME INDEX";
+const PRI_KEY_CONSTRAINT_TYPE: &str = "PRIMARY KEY";
+
+impl InformationSchemaTableConstraints {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(
+                CONSTRAINT_CATALOG,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                CONSTRAINT_SCHEMA,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(CONSTRAINT_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(ENFORCED, ConcreteDataType::string_datatype(), false),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaTableConstraintsBuilder {
+        InformationSchemaTableConstraintsBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaTableConstraints {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        TABLE_CONSTRAINTS
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_table_constraints(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaTableConstraintsBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    constraint_schemas: StringVectorBuilder,
+    constraint_names: StringVectorBuilder,
+    table_schemas: StringVectorBuilder,
+    table_names: StringVectorBuilder,
+    constraint_types: StringVectorBuilder,
+}
+
+impl InformationSchemaTableConstraintsBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            constraint_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            constraint_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            constraint_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.table_constraints` virtual table
+    async fn make_table_constraints(
+        &mut self,
+        request: Option<ScanRequest>,
+    ) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+        let predicates = Predicates::from_scan_request(&request);
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
+
+            while let Some(table) = stream.try_next().await? {
+                let keys = &table.table_info().meta.primary_key_indices;
+                let schema = table.schema();
+
+                if schema.timestamp_index().is_some() {
+                    self.add_table_constraint(
+                        &predicates,
+                        &schema_name,
+                        TIME_INDEX_CONSTRAINT_NAME,
+                        &schema_name,
+                        &table.table_info().name,
+                        TIME_INDEX_CONSTRAINT_TYPE,
+                    );
+                }
+
+                if !keys.is_empty() {
+                    self.add_table_constraint(
+                        &predicates,
+                        &schema_name,
+                        PRI_CONSTRAINT_NAME,
+                        &schema_name,
+                        &table.table_info().name,
+                        PRI_KEY_CONSTRAINT_TYPE,
+                    );
+                }
+            }
+        }
+
+        self.finish()
+    }
+
+    fn add_table_constraint(
+        &mut self,
+        predicates: &Predicates,
+        constraint_schema: &str,
+        constraint_name: &str,
+        table_schema: &str,
+        table_name: &str,
+        constraint_type: &str,
+    ) {
+        let row = [
+            (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
+            (CONSTRAINT_NAME, &Value::from(constraint_name)),
+            (TABLE_SCHEMA, &Value::from(table_schema)),
+            (TABLE_NAME, &Value::from(table_name)),
+            (CONSTRAINT_TYPE, &Value::from(constraint_type)),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
+        self.constraint_schemas.push(Some(constraint_schema));
+        self.constraint_names.push(Some(constraint_name));
+        self.table_schemas.push(Some(table_schema));
+        self.table_names.push(Some(table_name));
+        self.constraint_types.push(Some(constraint_type));
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let rows_num = self.constraint_names.len();
+
+        let constraint_catalogs = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec!["def"])),
+            rows_num,
+        ));
+        let enforceds = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec!["YES"])),
+            rows_num,
+        ));
+
+        let columns: Vec<VectorRef> = vec![
+            constraint_catalogs,
+            Arc::new(self.constraint_schemas.finish()),
+            Arc::new(self.constraint_names.finish()),
+            Arc::new(self.table_schemas.finish()),
+            Arc::new(self.table_names.finish()),
+            Arc::new(self.constraint_types.finish()),
+            enforceds,
+        ];
+
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaTableConstraints {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_table_constraints(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/table_names.rs
+++ b/src/catalog/src/information_schema/table_names.rs
@@ -41,3 +41,4 @@ pub const SESSION_STATUS: &str = "session_status";
 pub const RUNTIME_METRICS: &str = "runtime_metrics";
 pub const PARTITIONS: &str = "partitions";
 pub const REGION_PEERS: &str = "greptime_region_peers";
+pub const TABLE_CONSTRAINTS: &str = "table_constraints";
--- a/src/catalog/src/kvbackend/client.rs
+++ b/src/catalog/src/kvbackend/client.rs
@@ -17,7 +17,6 @@ use std::fmt::Debug;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::Duration;
-use std::usize;

 use common_error::ext::BoxedError;
 use common_meta::cache_invalidator::KvCacheInvalidator;
@@ -506,32 +505,32 @@ mod tests {
        }

        async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn compare_and_put(
            &self,
            _req: CompareAndPutRequest,
        ) -> Result<CompareAndPutResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn delete_range(
            &self,
            _req: DeleteRangeRequest,
        ) -> Result<DeleteRangeResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }

        async fn batch_delete(
            &self,
            _req: BatchDeleteRequest,
        ) -> Result<BatchDeleteResponse, Self::Error> {
-            todo!()
+            unimplemented!()
        }
    }

--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -49,10 +49,7 @@ impl DfTableSourceProvider {
        }
    }

-    pub fn resolve_table_ref<'a>(
-        &'a self,
-        table_ref: TableReference<'a>,
-    ) -> Result<ResolvedTableReference<'a>> {
+    pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result<ResolvedTableReference> {
        if self.disallow_cross_catalog_query {
            match &table_ref {
                TableReference::Bare { .. } => (),
@@ -76,7 +73,7 @@ impl DfTableSourceProvider {

    pub async fn resolve_table(
        &mut self,
-        table_ref: TableReference<'_>,
+        table_ref: TableReference,
    ) -> Result<Arc<dyn TableSource>> {
        let table_ref = self.resolve_table_ref(table_ref)?;

@@ -106,8 +103,6 @@ impl DfTableSourceProvider {

 #[cfg(test)]
 mod tests {
-    use std::borrow::Cow;
-
    use session::context::QueryContext;

    use super::*;
@@ -120,68 +115,37 @@ mod tests {
        let table_provider =
            DfTableSourceProvider::new(MemoryCatalogManager::with_default_setup(), true, query_ctx);

-        let table_ref = TableReference::Bare {
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::bare("table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Partial {
-            schema: Cow::Borrowed("public"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::partial("public", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Partial {
-            schema: Cow::Borrowed("wrong_schema"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::partial("wrong_schema", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("greptime"),
-            schema: Cow::Borrowed("public"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::full("greptime", "public", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("wrong_catalog"),
-            schema: Cow::Borrowed("public"),
-            table: Cow::Borrowed("table_name"),
-        };
+        let table_ref = TableReference::full("wrong_catalog", "public", "table_name");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_err());

-        let table_ref = TableReference::Partial {
-            schema: Cow::Borrowed("information_schema"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::partial("information_schema", "columns");
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("greptime"),
-            schema: Cow::Borrowed("information_schema"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::full("greptime", "information_schema", "columns");
        assert!(table_provider.resolve_table_ref(table_ref).is_ok());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("dummy"),
-            schema: Cow::Borrowed("information_schema"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::full("dummy", "information_schema", "columns");
        assert!(table_provider.resolve_table_ref(table_ref).is_err());

-        let table_ref = TableReference::Full {
-            catalog: Cow::Borrowed("greptime"),
-            schema: Cow::Borrowed("greptime_private"),
-            table: Cow::Borrowed("columns"),
-        };
+        let table_ref = TableReference::full("greptime", "greptime_private", "columns");
        assert!(table_provider.resolve_table_ref(table_ref).is_ok());
    }
 }
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -37,6 +37,8 @@ use snafu::{ensure, ResultExt};
 use crate::error::{ConvertFlightDataSnafu, Error, IllegalFlightMessagesSnafu, ServerSnafu};
 use crate::{error, from_grpc_response, metrics, Client, Result, StreamInserter};

+pub const DEFAULT_LOOKBACK_STRING: &str = "5m";
+
 #[derive(Clone, Debug, Default)]
 pub struct Database {
    // The "catalog" and "schema" to be used in processing the requests at the server side.
@@ -215,6 +217,7 @@ impl Database {
                start: start.to_string(),
                end: end.to_string(),
                step: step.to_string(),
+                lookback: DEFAULT_LOOKBACK_STRING.to_string(),
            })),
        }))
        .await
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -14,6 +14,7 @@

 use std::sync::Arc;

+use api::region::RegionResponse;
 use api::v1::region::{QueryRequest, RegionRequest};
 use api::v1::ResponseHeader;
 use arc_swap::ArcSwapOption;
@@ -23,7 +24,7 @@ use async_trait::async_trait;
 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_grpc::flight::{FlightDecoder, FlightMessage};
-use common_meta::datanode_manager::{Datanode, HandleResponse};
+use common_meta::datanode_manager::Datanode;
 use common_meta::error::{self as meta_error, Result as MetaResult};
 use common_recordbatch::error::ExternalSnafu;
 use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
@@ -46,7 +47,7 @@ pub struct RegionRequester {

 #[async_trait]
 impl Datanode for RegionRequester {
-    async fn handle(&self, request: RegionRequest) -> MetaResult<HandleResponse> {
+    async fn handle(&self, request: RegionRequest) -> MetaResult<RegionResponse> {
        self.handle_inner(request).await.map_err(|err| {
            if err.should_retry() {
                meta_error::Error::RetryLater {
@@ -165,7 +166,7 @@ impl RegionRequester {
        Ok(Box::pin(record_batch_stream))
    }

-    async fn handle_inner(&self, request: RegionRequest) -> Result<HandleResponse> {
+    async fn handle_inner(&self, request: RegionRequest) -> Result<RegionResponse> {
        let request_type = request
            .body
            .as_ref()
@@ -194,10 +195,10 @@ impl RegionRequester {

        check_response_header(&response.header)?;

-        Ok(HandleResponse::from_region_response(response))
+        Ok(RegionResponse::from_region_response(response))
    }

-    pub async fn handle(&self, request: RegionRequest) -> Result<HandleResponse> {
+    pub async fn handle(&self, request: RegionRequest) -> Result<RegionResponse> {
        self.handle_inner(request).await
    }
 }
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -36,6 +36,7 @@ common-telemetry = { workspace = true, features = [
    "deadlock_detection",
 ] }
 common-time.workspace = true
+common-version.workspace = true
 common-wal.workspace = true
 config = "0.13"
 datanode.workspace = true
@@ -76,6 +77,7 @@ tikv-jemallocator = "0.5"
 common-test-util.workspace = true
 serde.workspace = true
 temp-env = "0.3"
+tempfile.workspace = true

 [target.'cfg(not(windows))'.dev-dependencies]
 rexpect = "0.5"
--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -22,6 +22,7 @@ use cmd::options::{CliOptions, Options};
 use cmd::{
    cli, datanode, frontend, greptimedb_cli, log_versions, metasrv, standalone, start_app, App,
 };
+use common_version::{short_version, version};

 #[derive(Parser)]
 enum SubCommand {
@@ -105,7 +106,8 @@ async fn main() -> Result<()> {

    common_telemetry::set_panic_hook();

-    let cli = greptimedb_cli();
+    let version = version!();
+    let cli = greptimedb_cli().version(version);

    let cli = SubCommand::augment_subcommands(cli);

@@ -129,7 +131,7 @@ async fn main() -> Result<()> {
        opts.node_id(),
    );

-    log_versions();
+    log_versions(version, short_version!());

    let app = subcmd.build(opts).await?;

--- a/src/cmd/src/cli.rs
+++ b/src/cmd/src/cli.rs
@@ -84,10 +84,10 @@ impl Command {
        let mut logging_opts = LoggingOptions::default();

        if let Some(dir) = &cli_options.log_dir {
-            logging_opts.dir = dir.clone();
+            logging_opts.dir.clone_from(dir);
        }

-        logging_opts.level = cli_options.log_level.clone();
+        logging_opts.level.clone_from(&cli_options.log_level);

        Ok(Options::Cli(Box::new(logging_opts)))
    }
--- a/src/cmd/src/cli/bench/metadata.rs
+++ b/src/cmd/src/cli/bench/metadata.rs
@@ -107,14 +107,11 @@ impl TableMetadataBencher {
                    .unwrap();
                let start = Instant::now();
                let table_info = table_info.unwrap();
+                let table_route = table_route.unwrap();
                let table_id = table_info.table_info.ident.table_id;
                let _ = self
                    .table_metadata_manager
-                    .delete_table_metadata(
-                        table_id,
-                        &table_info.table_name(),
-                        table_route.unwrap().region_routes().unwrap(),
-                    )
+                    .delete_table_metadata(table_id, &table_info.table_name(), &table_route)
                    .await;
                start.elapsed()
            },
@@ -140,7 +137,7 @@ impl TableMetadataBencher {
                let start = Instant::now();
                let _ = self
                    .table_metadata_manager
-                    .rename_table(table_info.unwrap(), new_table_name)
+                    .rename_table(&table_info.unwrap(), new_table_name)
                    .await;

                start.elapsed()
--- a/src/cmd/src/cli/export.rs
+++ b/src/cmd/src/cli/export.rs
@@ -226,7 +226,10 @@ impl Export {
    }

    async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
-        let sql = format!("show create table {}.{}.{}", catalog, schema, table);
+        let sql = format!(
+            r#"show create table "{}"."{}"."{}""#,
+            catalog, schema, table
+        );
        let mut client = self.client.clone();
        client.set_catalog(catalog);
        client.set_schema(schema);
@@ -273,7 +276,7 @@ impl Export {
                for (c, s, t) in table_list {
                    match self.show_create_table(&c, &s, &t).await {
                        Err(e) => {
-                            error!(e; "Failed to export table {}.{}.{}", c, s, t)
+                            error!(e; r#"Failed to export table "{}"."{}"."{}""#, c, s, t)
                        }
                        Ok(create_table) => {
                            file.write_all(create_table.as_bytes())
@@ -417,3 +420,82 @@ fn split_database(database: &str) -> Result<(String, Option<String>)> {
        Ok((catalog.to_string(), Some(schema.to_string())))
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use clap::Parser;
+    use client::{Client, Database};
+    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+
+    use crate::error::Result;
+    use crate::options::{CliOptions, Options};
+    use crate::{cli, standalone, App};
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_export_create_table_with_quoted_names() -> Result<()> {
+        let output_dir = tempfile::tempdir().unwrap();
+
+        let standalone = standalone::Command::parse_from([
+            "standalone",
+            "start",
+            "--data-home",
+            &*output_dir.path().to_string_lossy(),
+        ]);
+        let Options::Standalone(standalone_opts) =
+            standalone.load_options(&CliOptions::default())?
+        else {
+            unreachable!()
+        };
+        let mut instance = standalone.build(*standalone_opts).await?;
+        instance.start().await?;
+
+        let client = Client::with_urls(["127.0.0.1:4001"]);
+        let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
+        database
+            .sql(r#"CREATE DATABASE "cli.export.create_table";"#)
+            .await
+            .unwrap();
+        database
+            .sql(
+                r#"CREATE TABLE "cli.export.create_table"."a.b.c"(
+                        ts TIMESTAMP,
+                        TIME INDEX (ts)
+                    ) engine=mito;
+                "#,
+            )
+            .await
+            .unwrap();
+
+        let output_dir = tempfile::tempdir().unwrap();
+        let cli = cli::Command::parse_from([
+            "cli",
+            "export",
+            "--addr",
+            "127.0.0.1:4001",
+            "--output-dir",
+            &*output_dir.path().to_string_lossy(),
+            "--target",
+            "create-table",
+        ]);
+        let mut cli_app = cli.build().await?;
+        cli_app.start().await?;
+
+        instance.stop().await?;
+
+        let output_file = output_dir
+            .path()
+            .join("greptime-cli.export.create_table.sql");
+        let res = std::fs::read_to_string(output_file).unwrap();
+        let expect = r#"CREATE TABLE IF NOT EXISTS "a.b.c" (
+  "ts" TIMESTAMP(3) NOT NULL,
+  TIME INDEX ("ts")
+)
+
+ENGINE=mito
+;
+"#;
+        assert_eq!(res.trim(), expect.trim());
+
+        Ok(())
+    }
+}
--- a/src/cmd/src/cli/upgrade.rs
+++ b/src/cmd/src/cli/upgrade.rs
@@ -192,10 +192,10 @@ impl MigrateTableMetadata {
                let key = v1SchemaKey::parse(key_str)
                    .unwrap_or_else(|e| panic!("schema key is corrupted: {e}, key: {key_str}"));

-                Ok((key, ()))
+                Ok(key)
            }),
        );
-        while let Some((key, _)) = stream.try_next().await.context(error::IterStreamSnafu)? {
+        while let Some(key) = stream.try_next().await.context(error::IterStreamSnafu)? {
            let _ = self.migrate_schema_key(&key).await;
            keys.push(key.to_string().as_bytes().to_vec());
        }
@@ -244,10 +244,10 @@ impl MigrateTableMetadata {
                let key = v1CatalogKey::parse(key_str)
                    .unwrap_or_else(|e| panic!("catalog key is corrupted: {e}, key: {key_str}"));

-                Ok((key, ()))
+                Ok(key)
            }),
        );
-        while let Some((key, _)) = stream.try_next().await.context(error::IterStreamSnafu)? {
+        while let Some(key) = stream.try_next().await.context(error::IterStreamSnafu)? {
            let _ = self.migrate_catalog_key(&key).await;
            keys.push(key.to_string().as_bytes().to_vec());
        }
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -139,19 +139,19 @@ impl StartCommand {
        )?;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        if let Some(addr) = &self.rpc_addr {
-            opts.rpc_addr = addr.clone();
+            opts.rpc_addr.clone_from(addr);
        }

        if self.rpc_hostname.is_some() {
-            opts.rpc_hostname = self.rpc_hostname.clone();
+            opts.rpc_hostname.clone_from(&self.rpc_hostname);
        }

        if let Some(node_id) = self.node_id {
@@ -161,7 +161,8 @@ impl StartCommand {
        if let Some(metasrv_addrs) = &self.metasrv_addr {
            opts.meta_client
                .get_or_insert_with(MetaClientOptions::default)
-                .metasrv_addrs = metasrv_addrs.clone();
+                .metasrv_addrs
+                .clone_from(metasrv_addrs);
            opts.mode = Mode::Distributed;
        }

@@ -173,7 +174,7 @@ impl StartCommand {
        }

        if let Some(data_home) = &self.data_home {
-            opts.storage.data_home = data_home.clone();
+            opts.storage.data_home.clone_from(data_home);
        }

        // `wal_dir` only affects raft-engine config.
@@ -191,7 +192,7 @@ impl StartCommand {
        }

        if let Some(http_addr) = &self.http_addr {
-            opts.http.addr = http_addr.clone();
+            opts.http.addr.clone_from(http_addr);
        }

        if let Some(http_timeout) = self.http_timeout {
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -157,11 +157,11 @@ impl StartCommand {
        )?;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        let tls_opts = TlsOption::new(
@@ -171,7 +171,7 @@ impl StartCommand {
        );

        if let Some(addr) = &self.http_addr {
-            opts.http.addr = addr.clone()
+            opts.http.addr.clone_from(addr);
        }

        if let Some(http_timeout) = self.http_timeout {
@@ -183,24 +183,24 @@ impl StartCommand {
        }

        if let Some(addr) = &self.rpc_addr {
-            opts.grpc.addr = addr.clone()
+            opts.grpc.addr.clone_from(addr);
        }

        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
-            opts.mysql.addr = addr.clone();
+            opts.mysql.addr.clone_from(addr);
            opts.mysql.tls = tls_opts.clone();
        }

        if let Some(addr) = &self.postgres_addr {
            opts.postgres.enable = true;
-            opts.postgres.addr = addr.clone();
+            opts.postgres.addr.clone_from(addr);
            opts.postgres.tls = tls_opts;
        }

        if let Some(addr) = &self.opentsdb_addr {
            opts.opentsdb.enable = true;
-            opts.opentsdb.addr = addr.clone();
+            opts.opentsdb.addr.clone_from(addr);
        }

        if let Some(enable) = self.influxdb_enable {
@@ -210,11 +210,12 @@ impl StartCommand {
        if let Some(metasrv_addrs) = &self.metasrv_addr {
            opts.meta_client
                .get_or_insert_with(MetaClientOptions::default)
-                .metasrv_addrs = metasrv_addrs.clone();
+                .metasrv_addrs
+                .clone_from(metasrv_addrs);
            opts.mode = Mode::Distributed;
        }

-        opts.user_provider = self.user_provider.clone();
+        opts.user_provider.clone_from(&self.user_provider);

        Ok(Options::Frontend(Box::new(opts)))
    }
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -64,26 +64,23 @@ pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
    Ok(())
 }

-pub fn log_versions() {
+/// Log the versions of the application, and the arguments passed to the cli.
+/// `version_string` should be the same as the output of cli "--version";
+/// and the `app_version` is the short version of the codes, often consist of git branch and commit.
+pub fn log_versions(version_string: &str, app_version: &str) {
    // Report app version as gauge.
    APP_VERSION
-        .with_label_values(&[short_version(), full_version()])
+        .with_label_values(&[env!("CARGO_PKG_VERSION"), app_version])
        .inc();

    // Log version and argument flags.
-    info!(
-        "short_version: {}, full_version: {}",
-        short_version(),
-        full_version()
-    );
+    info!("GreptimeDB version: {}", version_string);

    log_env_flags();
 }

 pub fn greptimedb_cli() -> clap::Command {
-    let cmd = clap::Command::new("greptimedb")
-        .version(print_version())
-        .subcommand_required(true);
+    let cmd = clap::Command::new("greptimedb").subcommand_required(true);

    #[cfg(feature = "tokio-console")]
    let cmd = cmd.arg(arg!(--"tokio-console-addr"[TOKIO_CONSOLE_ADDR]));
@@ -91,35 +88,6 @@ pub fn greptimedb_cli() -> clap::Command {
    cmd.args([arg!(--"log-dir"[LOG_DIR]), arg!(--"log-level"[LOG_LEVEL])])
 }

-fn print_version() -> &'static str {
-    concat!(
-        "\nbranch: ",
-        env!("GIT_BRANCH"),
-        "\ncommit: ",
-        env!("GIT_COMMIT"),
-        "\ndirty: ",
-        env!("GIT_DIRTY"),
-        "\nversion: ",
-        env!("CARGO_PKG_VERSION")
-    )
-}
-
-fn short_version() -> &'static str {
-    env!("CARGO_PKG_VERSION")
-}
-
-// {app_name}-{branch_name}-{commit_short}
-// The branch name (tag) of a release build should already contain the short
-// version so the full version doesn't concat the short version explicitly.
-fn full_version() -> &'static str {
-    concat!(
-        "greptimedb-",
-        env!("GIT_BRANCH"),
-        "-",
-        env!("GIT_COMMIT_SHORT")
-    )
-}
-
 fn log_env_flags() {
    info!("command line arguments");
    for argument in std::env::args() {
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -17,8 +17,8 @@ use std::time::Duration;
 use async_trait::async_trait;
 use clap::Parser;
 use common_telemetry::logging;
-use meta_srv::bootstrap::MetaSrvInstance;
-use meta_srv::metasrv::MetaSrvOptions;
+use meta_srv::bootstrap::MetasrvInstance;
+use meta_srv::metasrv::MetasrvOptions;
 use snafu::ResultExt;

 use crate::error::{self, Result, StartMetaServerSnafu};
@@ -26,11 +26,11 @@ use crate::options::{CliOptions, Options};
 use crate::App;

 pub struct Instance {
-    instance: MetaSrvInstance,
+    instance: MetasrvInstance,
 }

 impl Instance {
-    fn new(instance: MetaSrvInstance) -> Self {
+    fn new(instance: MetasrvInstance) -> Self {
        Self { instance }
    }
 }
@@ -42,7 +42,7 @@ impl App for Instance {
    }

    async fn start(&mut self) -> Result<()> {
-        plugins::start_meta_srv_plugins(self.instance.plugins())
+        plugins::start_metasrv_plugins(self.instance.plugins())
            .await
            .context(StartMetaServerSnafu)?;

@@ -64,7 +64,7 @@ pub struct Command {
 }

 impl Command {
-    pub async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
+    pub async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
        self.subcmd.build(opts).await
    }

@@ -79,7 +79,7 @@ enum SubCommand {
 }

 impl SubCommand {
-    async fn build(self, opts: MetaSrvOptions) -> Result<Instance> {
+    async fn build(self, opts: MetasrvOptions) -> Result<Instance> {
        match self {
            SubCommand::Start(cmd) => cmd.build(opts).await,
        }
@@ -127,30 +127,30 @@ struct StartCommand {

 impl StartCommand {
    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
-        let mut opts: MetaSrvOptions = Options::load_layered_options(
+        let mut opts: MetasrvOptions = Options::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
-            MetaSrvOptions::env_list_keys(),
+            MetasrvOptions::env_list_keys(),
        )?;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        if let Some(addr) = &self.bind_addr {
-            opts.bind_addr = addr.clone();
+            opts.bind_addr.clone_from(addr);
        }

        if let Some(addr) = &self.server_addr {
-            opts.server_addr = addr.clone();
+            opts.server_addr.clone_from(addr);
        }

        if let Some(addr) = &self.store_addr {
-            opts.store_addr = addr.clone();
+            opts.store_addr.clone_from(addr);
        }

        if let Some(selector_type) = &self.selector {
@@ -168,7 +168,7 @@ impl StartCommand {
        }

        if let Some(http_addr) = &self.http_addr {
-            opts.http.addr = http_addr.clone();
+            opts.http.addr.clone_from(http_addr);
        }

        if let Some(http_timeout) = self.http_timeout {
@@ -176,11 +176,11 @@ impl StartCommand {
        }

        if let Some(data_home) = &self.data_home {
-            opts.data_home = data_home.clone();
+            opts.data_home.clone_from(data_home);
        }

        if !self.store_key_prefix.is_empty() {
-            opts.store_key_prefix = self.store_key_prefix.clone()
+            opts.store_key_prefix.clone_from(&self.store_key_prefix)
        }

        if let Some(max_txn_ops) = self.max_txn_ops {
@@ -193,20 +193,20 @@ impl StartCommand {
        Ok(Options::Metasrv(Box::new(opts)))
    }

-    async fn build(self, mut opts: MetaSrvOptions) -> Result<Instance> {
-        let plugins = plugins::setup_meta_srv_plugins(&mut opts)
+    async fn build(self, mut opts: MetasrvOptions) -> Result<Instance> {
+        let plugins = plugins::setup_metasrv_plugins(&mut opts)
            .await
            .context(StartMetaServerSnafu)?;

-        logging::info!("MetaSrv start command: {:#?}", self);
-        logging::info!("MetaSrv options: {:#?}", opts);
+        logging::info!("Metasrv start command: {:#?}", self);
+        logging::info!("Metasrv options: {:#?}", opts);

        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
            .await
            .context(error::BuildMetaServerSnafu)?;
        let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;

-        let instance = MetaSrvInstance::new(opts, plugins, metasrv)
+        let instance = MetasrvInstance::new(opts, plugins, metasrv)
            .await
            .context(error::BuildMetaServerSnafu)?;

--- a/src/cmd/src/options.rs
+++ b/src/cmd/src/options.rs
@@ -15,12 +15,12 @@
 use clap::ArgMatches;
 use common_config::KvBackendConfig;
 use common_telemetry::logging::{LoggingOptions, TracingOptions};
-use common_wal::config::MetaSrvWalConfig;
+use common_wal::config::MetasrvWalConfig;
 use config::{Config, Environment, File, FileFormat};
 use datanode::config::{DatanodeOptions, ProcedureConfig};
 use frontend::error::{Result as FeResult, TomlFormatSnafu};
 use frontend::frontend::{FrontendOptions, TomlSerializable};
-use meta_srv::metasrv::MetaSrvOptions;
+use meta_srv::metasrv::MetasrvOptions;
 use serde::{Deserialize, Serialize};
 use snafu::ResultExt;

@@ -38,7 +38,7 @@ pub struct MixOptions {
    pub frontend: FrontendOptions,
    pub datanode: DatanodeOptions,
    pub logging: LoggingOptions,
-    pub wal_meta: MetaSrvWalConfig,
+    pub wal_meta: MetasrvWalConfig,
 }

 impl From<MixOptions> for FrontendOptions {
@@ -56,7 +56,7 @@ impl TomlSerializable for MixOptions {
 pub enum Options {
    Datanode(Box<DatanodeOptions>),
    Frontend(Box<FrontendOptions>),
-    Metasrv(Box<MetaSrvOptions>),
+    Metasrv(Box<MetasrvOptions>),
    Standalone(Box<MixOptions>),
    Cli(Box<LoggingOptions>),
 }
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -293,11 +293,11 @@ impl StartCommand {
        opts.mode = Mode::Standalone;

        if let Some(dir) = &cli_options.log_dir {
-            opts.logging.dir = dir.clone();
+            opts.logging.dir.clone_from(dir);
        }

        if cli_options.log_level.is_some() {
-            opts.logging.level = cli_options.log_level.clone();
+            opts.logging.level.clone_from(&cli_options.log_level);
        }

        let tls_opts = TlsOption::new(
@@ -307,11 +307,11 @@ impl StartCommand {
        );

        if let Some(addr) = &self.http_addr {
-            opts.http.addr = addr.clone()
+            opts.http.addr.clone_from(addr);
        }

        if let Some(data_home) = &self.data_home {
-            opts.storage.data_home = data_home.clone();
+            opts.storage.data_home.clone_from(data_home);
        }

        if let Some(addr) = &self.rpc_addr {
@@ -325,31 +325,31 @@ impl StartCommand {
                }
                .fail();
            }
-            opts.grpc.addr = addr.clone()
+            opts.grpc.addr.clone_from(addr)
        }

        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
-            opts.mysql.addr = addr.clone();
+            opts.mysql.addr.clone_from(addr);
            opts.mysql.tls = tls_opts.clone();
        }

        if let Some(addr) = &self.postgres_addr {
            opts.postgres.enable = true;
-            opts.postgres.addr = addr.clone();
+            opts.postgres.addr.clone_from(addr);
            opts.postgres.tls = tls_opts;
        }

        if let Some(addr) = &self.opentsdb_addr {
            opts.opentsdb.enable = true;
-            opts.opentsdb.addr = addr.clone();
+            opts.opentsdb.addr.clone_from(addr);
        }

        if self.influxdb_enable {
            opts.influxdb.enable = self.influxdb_enable;
        }

-        opts.user_provider = self.user_provider.clone();
+        opts.user_provider.clone_from(&self.user_provider);

        let metadata_store = opts.metadata_store.clone();
        let procedure = opts.procedure.clone();
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -86,6 +86,8 @@ pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
 pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28;
 /// id for information_schema.REGION_PEERS
 pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29;
+/// id for information_schema.columns
+pub const INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID: u32 = 30;
 /// ----- End of information_schema tables -----

 pub const MITO_ENGINE: &str = "mito";
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -30,7 +30,7 @@ derive_builder.workspace = true
 futures.workspace = true
 lazy_static.workspace = true
 object-store.workspace = true
-orc-rust = "0.2"
+orc-rust = { git = "https://github.com/MichaelScofield/orc-rs.git", rev = "17347f5f084ac937863317df882218055c4ea8c1" }
 parquet.workspace = true
 paste = "1.0"
 regex = "1.7"
--- a/src/common/datasource/src/buffered_writer.rs
+++ b/src/common/datasource/src/buffered_writer.rs
@@ -60,12 +60,6 @@ impl<
            .context(error::BufferedWriterClosedSnafu)?;
        let metadata = encoder.close().await?;

-        // Use `rows_written` to keep a track of if any rows have been written.
-        // If no row's been written, then we can simply close the underlying
-        // writer without flush so that no file will be actually created.
-        if self.rows_written != 0 {
-            self.bytes_written += self.try_flush(true).await?;
-        }
        // It's important to shut down! flushes all pending writes
        self.close_inner_writer().await?;
        Ok((metadata, self.bytes_written))
@@ -79,8 +73,15 @@ impl<
        Fut: Future<Output = Result<T>>,
    > LazyBufferedWriter<T, U, F>
 {
-    /// Closes the writer without flushing the buffer data.
+    /// Closes the writer and flushes the buffer data.
    pub async fn close_inner_writer(&mut self) -> Result<()> {
+        // Use `rows_written` to keep a track of if any rows have been written.
+        // If no row's been written, then we can simply close the underlying
+        // writer without flush so that no file will be actually created.
+        if self.rows_written != 0 {
+            self.bytes_written += self.try_flush(true).await?;
+        }
+
        if let Some(writer) = &mut self.writer {
            writer.shutdown().await.context(error::AsyncWriteSnafu)?;
        }
@@ -117,7 +118,7 @@ impl<
        Ok(())
    }

-    pub async fn try_flush(&mut self, all: bool) -> Result<u64> {
+    async fn try_flush(&mut self, all: bool) -> Result<u64> {
        let mut bytes_written: u64 = 0;

        // Once buffered data size reaches threshold, split the data in chunks (typically 4MB)
--- a/src/common/datasource/src/file_format.rs
+++ b/src/common/datasource/src/file_format.rs
@@ -213,10 +213,6 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
        writer.write(&batch).await?;
        rows += batch.num_rows();
    }
-
-    // Flushes all pending writes
-    let _ = writer.try_flush(true).await?;
    writer.close_inner_writer().await?;
-
    Ok(rows)
 }
--- a/src/common/datasource/src/file_format/csv.rs
+++ b/src/common/datasource/src/file_format/csv.rs
@@ -117,7 +117,7 @@ impl CsvConfig {
        let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
            .with_delimiter(self.delimiter)
            .with_batch_size(self.batch_size)
-            .has_header(self.has_header);
+            .with_header(self.has_header);

        if let Some(proj) = &self.file_projection {
            builder = builder.with_projection(proj.clone());
--- a/src/common/datasource/src/file_format/parquet.rs
+++ b/src/common/datasource/src/file_format/parquet.rs
@@ -215,10 +215,7 @@ impl BufferedWriter {

    /// Write a record batch to stream writer.
    pub async fn write(&mut self, arrow_batch: &RecordBatch) -> error::Result<()> {
-        self.inner.write(arrow_batch).await?;
-        self.inner.try_flush(false).await?;
-
-        Ok(())
+        self.inner.write(arrow_batch).await
    }

    /// Close parquet writer.
--- a/src/common/datasource/src/file_format/tests.rs
+++ b/src/common/datasource/src/file_format/tests.rs
@@ -19,6 +19,7 @@ use std::vec;

 use common_test_util::find_workspace_path;
 use datafusion::assert_batches_eq;
+use datafusion::config::TableParquetOptions;
 use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
 use datafusion::execution::context::TaskContext;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -166,7 +167,7 @@ async fn test_parquet_exec() {
        .to_string();
    let base_config = scan_config(schema.clone(), None, path);

-    let exec = ParquetExec::new(base_config, None, None)
+    let exec = ParquetExec::new(base_config, None, None, TableParquetOptions::default())
        .with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)));

    let ctx = SessionContext::new();
--- a/src/common/datasource/src/test_util.rs
+++ b/src/common/datasource/src/test_util.rs
@@ -16,6 +16,7 @@ use std::sync::Arc;

 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use common_test_util::temp_dir::{create_temp_dir, TempDir};
+use datafusion::common::Statistics;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
@@ -72,17 +73,16 @@ pub fn test_basic_schema() -> SchemaRef {
 pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str) -> FileScanConfig {
    // object_store only recognize the Unix style path, so make it happy.
    let filename = &filename.replace('\\', "/");
-
+    let statistics = Statistics::new_unknown(file_schema.as_ref());
    FileScanConfig {
        object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
        file_schema,
        file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
-        statistics: Default::default(),
+        statistics,
        projection: None,
        limit,
        table_partition_cols: vec![],
        output_ordering: vec![],
-        infinite_source: false,
    }
 }

--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -59,6 +59,7 @@ pub enum StatusCode {
    RegionNotFound = 4005,
    RegionAlreadyExists = 4006,
    RegionReadonly = 4007,
+    /// Region is not in a proper state to handle specific request.
    RegionNotReady = 4008,
    // If mutually exclusive operations are reached at the same time,
    // only one can be executed, another one will get region busy.
--- a/src/common/function/src/scalars/aggregate/diff.rs
+++ b/src/common/function/src/scalars/aggregate/diff.rs
@@ -56,7 +56,7 @@ where
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![Value::List(ListValue::new(
-            Some(Box::new(nums)),
+            nums,
            I::LogicalType::build_data_type(),
        ))])
    }
@@ -120,10 +120,7 @@ where
                O::from_native(native).into()
            })
            .collect::<Vec<Value>>();
-        let diff = Value::List(ListValue::new(
-            Some(Box::new(diff)),
-            O::LogicalType::build_data_type(),
-        ));
+        let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
        Ok(diff)
    }
 }
@@ -218,10 +215,7 @@ mod test {
        let values = vec![Value::from(2_i64), Value::from(1_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
-            Value::List(ListValue::new(
-                Some(Box::new(values)),
-                ConcreteDataType::int64_datatype()
-            )),
+            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );

@@ -236,10 +230,7 @@ mod test {
        let values = vec![Value::from(5_i64), Value::from(1_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
-            Value::List(ListValue::new(
-                Some(Box::new(values)),
-                ConcreteDataType::int64_datatype()
-            )),
+            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );

@@ -252,10 +243,7 @@ mod test {
        let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
-            Value::List(ListValue::new(
-                Some(Box::new(values)),
-                ConcreteDataType::int64_datatype()
-            )),
+            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );
    }
--- a/src/common/function/src/scalars/aggregate/percentile.rs
+++ b/src/common/function/src/scalars/aggregate/percentile.rs
@@ -104,10 +104,7 @@ where
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.p.into(),
        ])
    }
--- a/src/common/function/src/scalars/aggregate/polyval.rs
+++ b/src/common/function/src/scalars/aggregate/polyval.rs
@@ -72,10 +72,7 @@ where
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
@@ -56,10 +56,7 @@ where
            .map(|&x| x.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
@@ -56,10 +56,7 @@ where
            .map(|&x| x.into())
            .collect::<Vec<Value>>();
        Ok(vec![
-            Value::List(ListValue::new(
-                Some(Box::new(nums)),
-                T::LogicalType::build_data_type(),
-            )),
+            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
--- a/src/common/function/src/scalars/math.rs
+++ b/src/common/function/src/scalars/math.rs
@@ -77,7 +77,7 @@ impl Function for RangeFunction {
    /// `range_fn` will never been used. As long as a legal signature is returned, the specific content of the signature does not matter.
    /// In fact, the arguments loaded by `range_fn` are very complicated, and it is difficult to use `Signature` to describe
    fn signature(&self) -> Signature {
-        Signature::any(0, Volatility::Immutable)
+        Signature::variadic_any(Volatility::Immutable)
    }

    fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
--- a/src/common/grpc/src/lib.rs
+++ b/src/common/grpc/src/lib.rs
@@ -15,7 +15,7 @@
 pub mod channel_manager;
 pub mod error;
 pub mod flight;
+pub mod precision;
 pub mod select;
-pub mod writer;

 pub use error::Error;
--- a/src/common/grpc/src/precision.rs
+++ b/src/common/grpc/src/precision.rs
@@ -0,0 +1,141 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Display;
+
+use common_time::timestamp::TimeUnit;
+
+use crate::Error;
+
+/// Precision represents the precision of a timestamp.
+/// It is used to convert timestamps between different precisions.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Precision {
+    Nanosecond,
+    Microsecond,
+    Millisecond,
+    Second,
+    Minute,
+    Hour,
+}
+
+impl Precision {
+    pub fn to_nanos(&self, amount: i64) -> Option<i64> {
+        match self {
+            Precision::Nanosecond => Some(amount),
+            Precision::Microsecond => amount.checked_mul(1_000),
+            Precision::Millisecond => amount.checked_mul(1_000_000),
+            Precision::Second => amount.checked_mul(1_000_000_000),
+            Precision::Minute => amount
+                .checked_mul(60)
+                .and_then(|a| a.checked_mul(1_000_000_000)),
+            Precision::Hour => amount
+                .checked_mul(3600)
+                .and_then(|a| a.checked_mul(1_000_000_000)),
+        }
+    }
+
+    pub fn to_millis(&self, amount: i64) -> Option<i64> {
+        match self {
+            Precision::Nanosecond => amount.checked_div(1_000_000),
+            Precision::Microsecond => amount.checked_div(1_000),
+            Precision::Millisecond => Some(amount),
+            Precision::Second => amount.checked_mul(1_000),
+            Precision::Minute => amount.checked_mul(60_000),
+            Precision::Hour => amount.checked_mul(3_600_000),
+        }
+    }
+}
+
+impl Display for Precision {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Precision::Nanosecond => write!(f, "Precision::Nanosecond"),
+            Precision::Microsecond => write!(f, "Precision::Microsecond"),
+            Precision::Millisecond => write!(f, "Precision::Millisecond"),
+            Precision::Second => write!(f, "Precision::Second"),
+            Precision::Minute => write!(f, "Precision::Minute"),
+            Precision::Hour => write!(f, "Precision::Hour"),
+        }
+    }
+}
+
+impl TryFrom<Precision> for TimeUnit {
+    type Error = Error;
+
+    fn try_from(precision: Precision) -> Result<Self, Self::Error> {
+        Ok(match precision {
+            Precision::Second => TimeUnit::Second,
+            Precision::Millisecond => TimeUnit::Millisecond,
+            Precision::Microsecond => TimeUnit::Microsecond,
+            Precision::Nanosecond => TimeUnit::Nanosecond,
+            _ => {
+                return Err(Error::NotSupported {
+                    feat: format!("convert {precision} into TimeUnit"),
+                })
+            }
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::precision::Precision;
+
+    #[test]
+    fn test_to_nanos() {
+        assert_eq!(Precision::Nanosecond.to_nanos(1).unwrap(), 1);
+        assert_eq!(Precision::Microsecond.to_nanos(1).unwrap(), 1_000);
+        assert_eq!(Precision::Millisecond.to_nanos(1).unwrap(), 1_000_000);
+        assert_eq!(Precision::Second.to_nanos(1).unwrap(), 1_000_000_000);
+        assert_eq!(Precision::Minute.to_nanos(1).unwrap(), 60 * 1_000_000_000);
+        assert_eq!(
+            Precision::Hour.to_nanos(1).unwrap(),
+            60 * 60 * 1_000_000_000
+        );
+    }
+
+    #[test]
+    fn test_to_millis() {
+        assert_eq!(Precision::Nanosecond.to_millis(1_000_000).unwrap(), 1);
+        assert_eq!(Precision::Microsecond.to_millis(1_000).unwrap(), 1);
+        assert_eq!(Precision::Millisecond.to_millis(1).unwrap(), 1);
+        assert_eq!(Precision::Second.to_millis(1).unwrap(), 1_000);
+        assert_eq!(Precision::Minute.to_millis(1).unwrap(), 60 * 1_000);
+        assert_eq!(Precision::Hour.to_millis(1).unwrap(), 60 * 60 * 1_000);
+    }
+
+    #[test]
+    fn test_to_nanos_basic() {
+        assert_eq!(Precision::Second.to_nanos(1), Some(1_000_000_000));
+        assert_eq!(Precision::Minute.to_nanos(1), Some(60 * 1_000_000_000));
+    }
+
+    #[test]
+    fn test_to_millis_basic() {
+        assert_eq!(Precision::Second.to_millis(1), Some(1_000));
+        assert_eq!(Precision::Minute.to_millis(1), Some(60_000));
+    }
+
+    #[test]
+    fn test_to_nanos_overflow() {
+        assert_eq!(Precision::Hour.to_nanos(i64::MAX / 100), None);
+    }
+
+    #[test]
+    fn test_zero_input() {
+        assert_eq!(Precision::Second.to_nanos(0), Some(0));
+        assert_eq!(Precision::Minute.to_millis(0), Some(0));
+    }
+}
--- a/src/common/grpc/src/writer.rs
+++ b/src/common/grpc/src/writer.rs
@@ -1,441 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::fmt::Display;
-
-use api::helper::values_with_capacity;
-use api::v1::{Column, ColumnDataType, ColumnDataTypeExtension, SemanticType};
-use common_base::BitVec;
-use common_time::timestamp::TimeUnit;
-use snafu::ensure;
-
-use crate::error::{Result, TypeMismatchSnafu};
-use crate::Error;
-
-type ColumnName = String;
-
-type RowCount = u32;
-
-// TODO(fys): will remove in the future.
-#[derive(Default)]
-pub struct LinesWriter {
-    column_name_index: HashMap<ColumnName, usize>,
-    null_masks: Vec<BitVec>,
-    batch: (Vec<Column>, RowCount),
-    lines: usize,
-}
-
-impl LinesWriter {
-    pub fn with_lines(lines: usize) -> Self {
-        Self {
-            lines,
-            ..Default::default()
-        }
-    }
-
-    pub fn write_ts(&mut self, column_name: &str, value: (i64, Precision)) -> Result<()> {
-        let (idx, column) = self.mut_column(
-            column_name,
-            ColumnDataType::TimestampMillisecond,
-            SemanticType::Timestamp,
-            None,
-        );
-        ensure!(
-            column.datatype == ColumnDataType::TimestampMillisecond as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "timestamp",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values
-            .timestamp_millisecond_values
-            .push(to_ms_ts(value.1, value.0));
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn write_tag(&mut self, column_name: &str, value: &str) -> Result<()> {
-        let (idx, column) =
-            self.mut_column(column_name, ColumnDataType::String, SemanticType::Tag, None);
-        ensure!(
-            column.datatype == ColumnDataType::String as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "string",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values.string_values.push(value.to_string());
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn write_u64(&mut self, column_name: &str, value: u64) -> Result<()> {
-        let (idx, column) = self.mut_column(
-            column_name,
-            ColumnDataType::Uint64,
-            SemanticType::Field,
-            None,
-        );
-        ensure!(
-            column.datatype == ColumnDataType::Uint64 as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "u64",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values.u64_values.push(value);
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn write_i64(&mut self, column_name: &str, value: i64) -> Result<()> {
-        let (idx, column) = self.mut_column(
-            column_name,
-            ColumnDataType::Int64,
-            SemanticType::Field,
-            None,
-        );
-        ensure!(
-            column.datatype == ColumnDataType::Int64 as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "i64",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values.i64_values.push(value);
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn write_f64(&mut self, column_name: &str, value: f64) -> Result<()> {
-        let (idx, column) = self.mut_column(
-            column_name,
-            ColumnDataType::Float64,
-            SemanticType::Field,
-            None,
-        );
-        ensure!(
-            column.datatype == ColumnDataType::Float64 as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "f64",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values.f64_values.push(value);
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn write_string(&mut self, column_name: &str, value: &str) -> Result<()> {
-        let (idx, column) = self.mut_column(
-            column_name,
-            ColumnDataType::String,
-            SemanticType::Field,
-            None,
-        );
-        ensure!(
-            column.datatype == ColumnDataType::String as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "string",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values.string_values.push(value.to_string());
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn write_bool(&mut self, column_name: &str, value: bool) -> Result<()> {
-        let (idx, column) = self.mut_column(
-            column_name,
-            ColumnDataType::Boolean,
-            SemanticType::Field,
-            None,
-        );
-        ensure!(
-            column.datatype == ColumnDataType::Boolean as i32,
-            TypeMismatchSnafu {
-                column_name,
-                expected: "boolean",
-                actual: format!("{:?}", column.datatype)
-            }
-        );
-        // It is safe to use unwrap here, because values has been initialized in mut_column()
-        let values = column.values.as_mut().unwrap();
-        values.bool_values.push(value);
-        self.null_masks[idx].push(false);
-        Ok(())
-    }
-
-    pub fn commit(&mut self) {
-        let batch = &mut self.batch;
-        batch.1 += 1;
-
-        for i in 0..batch.0.len() {
-            let null_mask = &mut self.null_masks[i];
-            if batch.1 as usize > null_mask.len() {
-                null_mask.push(true);
-            }
-        }
-    }
-
-    pub fn finish(mut self) -> (Vec<Column>, RowCount) {
-        let null_masks = self.null_masks;
-        for (i, null_mask) in null_masks.into_iter().enumerate() {
-            let columns = &mut self.batch.0;
-            columns[i].null_mask = null_mask.into_vec();
-        }
-        self.batch
-    }
-
-    fn mut_column(
-        &mut self,
-        column_name: &str,
-        datatype: ColumnDataType,
-        semantic_type: SemanticType,
-        datatype_extension: Option<ColumnDataTypeExtension>,
-    ) -> (usize, &mut Column) {
-        let column_names = &mut self.column_name_index;
-        let column_idx = match column_names.get(column_name) {
-            Some(i) => *i,
-            None => {
-                let new_idx = column_names.len();
-                let batch = &mut self.batch;
-                let to_insert = self.lines;
-                let mut null_mask = BitVec::with_capacity(to_insert);
-                null_mask.extend(BitVec::repeat(true, batch.1 as usize));
-                self.null_masks.push(null_mask);
-                batch.0.push(Column {
-                    column_name: column_name.to_string(),
-                    semantic_type: semantic_type.into(),
-                    values: Some(values_with_capacity(datatype, to_insert)),
-                    datatype: datatype as i32,
-                    null_mask: Vec::default(),
-                    datatype_extension,
-                });
-                let _ = column_names.insert(column_name.to_string(), new_idx);
-                new_idx
-            }
-        };
-        (column_idx, &mut self.batch.0[column_idx])
-    }
-}
-
-pub fn to_ms_ts(p: Precision, ts: i64) -> i64 {
-    match p {
-        Precision::Nanosecond => ts / 1_000_000,
-        Precision::Microsecond => ts / 1000,
-        Precision::Millisecond => ts,
-        Precision::Second => ts * 1000,
-        Precision::Minute => ts * 1000 * 60,
-        Precision::Hour => ts * 1000 * 60 * 60,
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum Precision {
-    Nanosecond,
-    Microsecond,
-    Millisecond,
-    Second,
-    Minute,
-    Hour,
-}
-
-impl Display for Precision {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Precision::Nanosecond => write!(f, "Precision::Nanosecond"),
-            Precision::Microsecond => write!(f, "Precision::Microsecond"),
-            Precision::Millisecond => write!(f, "Precision::Millisecond"),
-            Precision::Second => write!(f, "Precision::Second"),
-            Precision::Minute => write!(f, "Precision::Minute"),
-            Precision::Hour => write!(f, "Precision::Hour"),
-        }
-    }
-}
-
-impl TryFrom<Precision> for TimeUnit {
-    type Error = Error;
-
-    fn try_from(precision: Precision) -> std::result::Result<Self, Self::Error> {
-        Ok(match precision {
-            Precision::Second => TimeUnit::Second,
-            Precision::Millisecond => TimeUnit::Millisecond,
-            Precision::Microsecond => TimeUnit::Microsecond,
-            Precision::Nanosecond => TimeUnit::Nanosecond,
-            _ => {
-                return Err(Error::NotSupported {
-                    feat: format!("convert {precision} into TimeUnit"),
-                })
-            }
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use api::v1::{ColumnDataType, SemanticType};
-    use common_base::BitVec;
-
-    use super::LinesWriter;
-    use crate::writer::{to_ms_ts, Precision};
-
-    #[test]
-    fn test_lines_writer() {
-        let mut writer = LinesWriter::with_lines(3);
-
-        writer.write_tag("host", "host1").unwrap();
-        writer.write_f64("cpu", 0.5).unwrap();
-        writer.write_f64("memory", 0.4).unwrap();
-        writer.write_string("name", "name1").unwrap();
-        writer
-            .write_ts("ts", (101011000, Precision::Millisecond))
-            .unwrap();
-        writer.commit();
-
-        writer.write_tag("host", "host2").unwrap();
-        writer
-            .write_ts("ts", (102011001, Precision::Millisecond))
-            .unwrap();
-        writer.write_bool("enable_reboot", true).unwrap();
-        writer.write_u64("year_of_service", 2).unwrap();
-        writer.write_i64("temperature", 4).unwrap();
-        writer.commit();
-
-        writer.write_tag("host", "host3").unwrap();
-        writer.write_f64("cpu", 0.4).unwrap();
-        writer.write_u64("cpu_core_num", 16).unwrap();
-        writer
-            .write_ts("ts", (103011002, Precision::Millisecond))
-            .unwrap();
-        writer.commit();
-
-        let insert_batch = writer.finish();
-        assert_eq!(3, insert_batch.1);
-
-        let columns = insert_batch.0;
-        assert_eq!(9, columns.len());
-
-        let column = &columns[0];
-        assert_eq!("host", columns[0].column_name);
-        assert_eq!(ColumnDataType::String as i32, column.datatype);
-        assert_eq!(SemanticType::Tag as i32, column.semantic_type);
-        assert_eq!(
-            vec!["host1", "host2", "host3"],
-            column.values.as_ref().unwrap().string_values
-        );
-        verify_null_mask(&column.null_mask, vec![false, false, false]);
-
-        let column = &columns[1];
-        assert_eq!("cpu", column.column_name);
-        assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec![0.5, 0.4], column.values.as_ref().unwrap().f64_values);
-        verify_null_mask(&column.null_mask, vec![false, true, false]);
-
-        let column = &columns[2];
-        assert_eq!("memory", column.column_name);
-        assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec![0.4], column.values.as_ref().unwrap().f64_values);
-        verify_null_mask(&column.null_mask, vec![false, true, true]);
-
-        let column = &columns[3];
-        assert_eq!("name", column.column_name);
-        assert_eq!(ColumnDataType::String as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec!["name1"], column.values.as_ref().unwrap().string_values);
-        verify_null_mask(&column.null_mask, vec![false, true, true]);
-
-        let column = &columns[4];
-        assert_eq!("ts", column.column_name);
-        assert_eq!(ColumnDataType::TimestampMillisecond as i32, column.datatype);
-        assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
-        assert_eq!(
-            vec![101011000, 102011001, 103011002],
-            column.values.as_ref().unwrap().timestamp_millisecond_values
-        );
-        verify_null_mask(&column.null_mask, vec![false, false, false]);
-
-        let column = &columns[5];
-        assert_eq!("enable_reboot", column.column_name);
-        assert_eq!(ColumnDataType::Boolean as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec![true], column.values.as_ref().unwrap().bool_values);
-        verify_null_mask(&column.null_mask, vec![true, false, true]);
-
-        let column = &columns[6];
-        assert_eq!("year_of_service", column.column_name);
-        assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec![2], column.values.as_ref().unwrap().u64_values);
-        verify_null_mask(&column.null_mask, vec![true, false, true]);
-
-        let column = &columns[7];
-        assert_eq!("temperature", column.column_name);
-        assert_eq!(ColumnDataType::Int64 as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec![4], column.values.as_ref().unwrap().i64_values);
-        verify_null_mask(&column.null_mask, vec![true, false, true]);
-
-        let column = &columns[8];
-        assert_eq!("cpu_core_num", column.column_name);
-        assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
-        assert_eq!(SemanticType::Field as i32, column.semantic_type);
-        assert_eq!(vec![16], column.values.as_ref().unwrap().u64_values);
-        verify_null_mask(&column.null_mask, vec![true, true, false]);
-    }
-
-    fn verify_null_mask(data: &[u8], expected: Vec<bool>) {
-        let bitvec = BitVec::from_slice(data);
-        for (idx, b) in expected.iter().enumerate() {
-            assert_eq!(b, bitvec.get(idx).unwrap())
-        }
-    }
-
-    #[test]
-    fn test_to_ms() {
-        assert_eq!(100, to_ms_ts(Precision::Nanosecond, 100110000));
-        assert_eq!(100110, to_ms_ts(Precision::Microsecond, 100110000));
-        assert_eq!(100110000, to_ms_ts(Precision::Millisecond, 100110000));
-        assert_eq!(
-            100110000 * 1000 * 60,
-            to_ms_ts(Precision::Minute, 100110000)
-        );
-        assert_eq!(
-            100110000 * 1000 * 60 * 60,
-            to_ms_ts(Precision::Hour, 100110000)
-        );
-    }
-}
--- a/src/common/macro/src/range_fn.rs
+++ b/src/common/macro/src/range_fn.rs
@@ -119,15 +119,17 @@ fn build_struct(
            }

            pub fn scalar_udf() -> ScalarUDF {
-                ScalarUDF {
-                    name: Self::name().to_string(),
-                    signature: Signature::new(
+                // TODO(LFC): Use the new Datafusion UDF impl.
+                #[allow(deprecated)]
+                ScalarUDF::new(
+                    Self::name(),
+                    &Signature::new(
                        TypeSignature::Exact(Self::input_type()),
                        Volatility::Immutable,
                    ),
-                    return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
-                    fun: Arc::new(Self::calc),
-                }
+                    &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _),
+                    &(Arc::new(Self::calc) as _),
+                )
            }

            fn input_type() -> Vec<DataType> {
--- a/src/common/meta/src/cache_invalidator.rs
+++ b/src/common/meta/src/cache_invalidator.rs
@@ -18,6 +18,7 @@ use tokio::sync::RwLock;

 use crate::error::Result;
 use crate::instruction::CacheIdent;
+use crate::key::schema_name::SchemaNameKey;
 use crate::key::table_info::TableInfoKey;
 use crate::key::table_name::TableNameKey;
 use crate::key::table_route::TableRouteKey;
@@ -107,6 +108,10 @@ where
                    let key: TableNameKey = (&table_name).into();
                    self.invalidate_key(&key.as_raw_key()).await
                }
+                CacheIdent::SchemaName(schema_name) => {
+                    let key: SchemaNameKey = (&schema_name).into();
+                    self.invalidate_key(&key.as_raw_key()).await;
+                }
            }
        }
        Ok(())
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -50,11 +50,13 @@ pub trait ClusterInfo {
 }

 /// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
+/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
+/// a `cluster_id`, it serves multiple clusters.
 #[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct NodeInfoKey {
    /// The cluster id.
    pub cluster_id: u64,
-    /// The role of the node. It can be [Role::Datanode], [Role::Frontend], or [Role::Metasrv].
+    /// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
    pub role: Role,
    /// The node id.
    pub node_id: u64,
--- a/src/common/meta/src/datanode_manager.rs
+++ b/src/common/meta/src/datanode_manager.rs
@@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::collections::HashMap;
 use std::sync::Arc;

-use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
+use api::region::RegionResponse;
+use api::v1::region::{QueryRequest, RegionRequest};
 pub use common_base::AffectedRows;
 use common_recordbatch::SendableRecordBatchStream;

@@ -26,7 +26,7 @@ use crate::peer::Peer;
 #[async_trait::async_trait]
 pub trait Datanode: Send + Sync {
    /// Handles DML, and DDL requests.
-    async fn handle(&self, request: RegionRequest) -> Result<HandleResponse>;
+    async fn handle(&self, request: RegionRequest) -> Result<RegionResponse>;

    /// Handles query requests
    async fn handle_query(&self, request: QueryRequest) -> Result<SendableRecordBatchStream>;
@@ -42,27 +42,3 @@ pub trait DatanodeManager: Send + Sync {
 }

 pub type DatanodeManagerRef = Arc<dyn DatanodeManager>;
-
-/// This result struct is derived from [RegionResponse]
-#[derive(Debug)]
-pub struct HandleResponse {
-    pub affected_rows: AffectedRows,
-    pub extension: HashMap<String, Vec<u8>>,
-}
-
-impl HandleResponse {
-    pub fn from_region_response(region_response: RegionResponse) -> Self {
-        Self {
-            affected_rows: region_response.affected_rows as _,
-            extension: region_response.extension,
-        }
-    }
-
-    /// Creates one response without extension
-    pub fn new(affected_rows: AffectedRows) -> Self {
-        Self {
-            affected_rows,
-            extension: Default::default(),
-        }
-    }
-}
--- a/src/common/meta/src/ddl/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables.rs
@@ -35,6 +35,7 @@ use crate::ddl::DdlContext;
 use crate::error::{DecodeJsonSnafu, Error, MetadataCorruptionSnafu, Result};
 use crate::key::table_info::TableInfoValue;
 use crate::key::table_route::PhysicalTableRouteValue;
+use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::rpc::ddl::AlterTableTask;
 use crate::rpc::router::find_leaders;
@@ -245,10 +246,10 @@ pub struct AlterTablesData {
    tasks: Vec<AlterTableTask>,
    /// Table info values before the alter operation.
    /// Corresponding one-to-one with the AlterTableTask in tasks.
-    table_info_values: Vec<TableInfoValue>,
+    table_info_values: Vec<DeserializedValueWithBytes<TableInfoValue>>,
    /// Physical table info
    physical_table_id: TableId,
-    physical_table_info: Option<TableInfoValue>,
+    physical_table_info: Option<DeserializedValueWithBytes<TableInfoValue>>,
    physical_table_route: Option<PhysicalTableRouteValue>,
    physical_columns: Vec<ColumnMetadata>,
 }
--- a/src/common/meta/src/ddl/alter_logical_tables/metadata.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables/metadata.rs
@@ -24,6 +24,7 @@ use crate::error::{
 use crate::key::table_info::TableInfoValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::table_route::TableRouteValue;
+use crate::key::DeserializedValueWithBytes;
 use crate::rpc::ddl::AlterTableTask;

 impl AlterLogicalTablesProcedure {
@@ -61,11 +62,9 @@ impl AlterLogicalTablesProcedure {
            .get_full_table_info(self.data.physical_table_id)
            .await?;

-        let physical_table_info = physical_table_info
-            .with_context(|| TableInfoNotFoundSnafu {
-                table: format!("table id - {}", self.data.physical_table_id),
-            })?
-            .into_inner();
+        let physical_table_info = physical_table_info.with_context(|| TableInfoNotFoundSnafu {
+            table: format!("table id - {}", self.data.physical_table_id),
+        })?;
        let physical_table_route = physical_table_route
            .context(TableRouteNotFoundSnafu {
                table_id: self.data.physical_table_id,
@@ -99,9 +98,9 @@ impl AlterLogicalTablesProcedure {
    async fn get_all_table_info_values(
        &self,
        table_ids: &[TableId],
-    ) -> Result<Vec<TableInfoValue>> {
+    ) -> Result<Vec<DeserializedValueWithBytes<TableInfoValue>>> {
        let table_info_manager = self.context.table_metadata_manager.table_info_manager();
-        let mut table_info_map = table_info_manager.batch_get(table_ids).await?;
+        let mut table_info_map = table_info_manager.batch_get_raw(table_ids).await?;
        let mut table_info_values = Vec::with_capacity(table_ids.len());
        for (table_id, task) in table_ids.iter().zip(self.data.tasks.iter()) {
            let table_info_value =
--- a/src/common/meta/src/ddl/alter_logical_tables/update_metadata.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables/update_metadata.rs
@@ -33,6 +33,7 @@ impl AlterLogicalTablesProcedure {
            return Ok(());
        }

+        // Safety: must exist.
        let physical_table_info = self.data.physical_table_info.as_ref().unwrap();

        // Generates new table info
@@ -45,10 +46,7 @@ impl AlterLogicalTablesProcedure {
        // Updates physical table's metadata
        self.context
            .table_metadata_manager
-            .update_table_info(
-                DeserializedValueWithBytes::from_inner(physical_table_info.clone()),
-                new_raw_table_info,
-            )
+            .update_table_info(physical_table_info, new_raw_table_info)
            .await?;

        Ok(())
@@ -77,7 +75,9 @@ impl AlterLogicalTablesProcedure {
        Ok(())
    }

-    pub(crate) fn build_update_metadata(&self) -> Result<Vec<(TableInfoValue, RawTableInfo)>> {
+    pub(crate) fn build_update_metadata(
+        &self,
+    ) -> Result<Vec<(DeserializedValueWithBytes<TableInfoValue>, RawTableInfo)>> {
        let mut table_info_values_to_update = Vec::with_capacity(self.data.tasks.len());
        for (task, table) in self
            .data
@@ -94,8 +94,8 @@ impl AlterLogicalTablesProcedure {
    fn build_new_table_info(
        &self,
        task: &AlterTableTask,
-        table: &TableInfoValue,
-    ) -> Result<(TableInfoValue, RawTableInfo)> {
+        table: &DeserializedValueWithBytes<TableInfoValue>,
+    ) -> Result<(DeserializedValueWithBytes<TableInfoValue>, RawTableInfo)> {
        // Builds new_meta
        let table_info = TableInfo::try_from(table.table_info.clone())
            .context(error::ConvertRawTableInfoSnafu)?;
--- a/src/common/meta/src/ddl/alter_table.rs
+++ b/src/common/meta/src/ddl/alter_table.rs
@@ -12,52 +12,49 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+mod check;
+mod metadata;
+mod region_request;
+mod update_metadata;
+
 use std::vec;

 use api::v1::alter_expr::Kind;
-use api::v1::region::{
-    alter_request, region_request, AddColumn, AddColumns, AlterRequest, DropColumn, DropColumns,
-    RegionColumnDef, RegionRequest, RegionRequestHeader,
-};
-use api::v1::{AlterExpr, RenameTable};
+use api::v1::RenameTable;
 use async_trait::async_trait;
 use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
-use common_grpc_expr::alter_expr_to_request;
 use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
 use common_procedure::{
    Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status, StringKey,
 };
-use common_telemetry::tracing_context::TracingContext;
 use common_telemetry::{debug, info};
 use futures::future;
 use serde::{Deserialize, Serialize};
-use snafu::{ensure, OptionExt, ResultExt};
-use store_api::storage::{ColumnId, RegionId};
+use snafu::ResultExt;
+use store_api::storage::RegionId;
 use strum::AsRefStr;
-use table::metadata::{RawTableInfo, TableId, TableInfo};
-use table::requests::AlterKind;
+use table::metadata::{RawTableInfo, TableId};
 use table::table_reference::TableReference;

 use crate::cache_invalidator::Context;
 use crate::ddl::utils::add_peer_context_if_needed;
 use crate::ddl::DdlContext;
-use crate::error::{self, ConvertAlterTableRequestSnafu, Error, InvalidProtoMsgSnafu, Result};
+use crate::error::{Error, Result};
 use crate::instruction::CacheIdent;
 use crate::key::table_info::TableInfoValue;
-use crate::key::table_name::TableNameKey;
 use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
 use crate::metrics;
 use crate::rpc::ddl::AlterTableTask;
 use crate::rpc::router::{find_leader_regions, find_leaders};
-use crate::table_name::TableName;

+/// The alter table procedure
 pub struct AlterTableProcedure {
+    // The runtime context.
    context: DdlContext,
+    // The serialized data.
    data: AlterTableData,
-    /// proto alter Kind for adding/dropping columns.
-    kind: Option<alter_request::Kind>,
 }

 impl AlterTableProcedure {
@@ -65,123 +62,36 @@ impl AlterTableProcedure {

    pub fn new(
        cluster_id: u64,
+        table_id: TableId,
        task: AlterTableTask,
-        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
        context: DdlContext,
    ) -> Result<Self> {
-        let alter_kind = task
-            .alter_table
-            .kind
-            .as_ref()
-            .context(InvalidProtoMsgSnafu {
-                err_msg: "'kind' is absent",
-            })?;
-        let (kind, next_column_id) =
-            create_proto_alter_kind(&table_info_value.table_info, alter_kind)?;
-
-        debug!(
-            "New AlterTableProcedure, kind: {:?}, next_column_id: {:?}",
-            kind, next_column_id
-        );
-
+        task.validate()?;
        Ok(Self {
            context,
-            data: AlterTableData::new(task, table_info_value, cluster_id, next_column_id),
-            kind,
+            data: AlterTableData::new(task, table_id, cluster_id),
        })
    }

    pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
        let data: AlterTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
-        let alter_kind = data
-            .task
-            .alter_table
-            .kind
-            .as_ref()
-            .context(InvalidProtoMsgSnafu {
-                err_msg: "'kind' is absent",
-            })
-            .map_err(ProcedureError::external)?;
-        let (kind, next_column_id) =
-            create_proto_alter_kind(&data.table_info_value.table_info, alter_kind)
-                .map_err(ProcedureError::external)?;
-        assert_eq!(data.next_column_id, next_column_id);
-
-        Ok(AlterTableProcedure {
-            context,
-            data,
-            kind,
-        })
+        Ok(AlterTableProcedure { context, data })
    }

    // Checks whether the table exists.
-    async fn on_prepare(&mut self) -> Result<Status> {
-        let alter_expr = &self.alter_expr();
-        let catalog = &alter_expr.catalog_name;
-        let schema = &alter_expr.schema_name;
-
-        let alter_kind = self.alter_kind()?;
-        let manager = &self.context.table_metadata_manager;
-
-        if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
-            let new_table_name_key = TableNameKey::new(catalog, schema, new_table_name);
-
-            let exist = manager
-                .table_name_manager()
-                .exists(new_table_name_key)
-                .await?;
-
-            ensure!(
-                !exist,
-                error::TableAlreadyExistsSnafu {
-                    table_name: TableName::from(new_table_name_key).to_string(),
-                }
-            )
-        }
-
-        let table_name_key = TableNameKey::new(catalog, schema, &alter_expr.table_name);
-
-        let exist = manager.table_name_manager().exists(table_name_key).await?;
-
-        ensure!(
-            exist,
-            error::TableNotFoundSnafu {
-                table_name: TableName::from(table_name_key).to_string()
-            }
-        );
-
+    pub(crate) async fn on_prepare(&mut self) -> Result<Status> {
+        self.check_alter().await?;
+        self.fill_table_info().await?;
+        // Safety: Checked in `AlterTableProcedure::new`.
+        let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
        if matches!(alter_kind, Kind::RenameTable { .. }) {
            self.data.state = AlterTableState::UpdateMetadata;
        } else {
            self.data.state = AlterTableState::SubmitAlterRegionRequests;
        };
-
        Ok(Status::executing(true))
    }

-    fn alter_expr(&self) -> &AlterExpr {
-        &self.data.task.alter_table
-    }
-
-    fn alter_kind(&self) -> Result<&Kind> {
-        self.alter_expr()
-            .kind
-            .as_ref()
-            .context(InvalidProtoMsgSnafu {
-                err_msg: "'kind' is absent",
-            })
-    }
-
-    pub fn create_alter_region_request(&self, region_id: RegionId) -> Result<AlterRequest> {
-        let table_info = self.data.table_info();
-
-        Ok(AlterRequest {
-            region_id: region_id.as_u64(),
-            schema_version: table_info.ident.version,
-            kind: self.kind.clone(),
-        })
-    }
-
    pub async fn submit_alter_region_requests(&mut self) -> Result<Status> {
        let table_id = self.data.table_id();
        let (_, physical_table_route) = self
@@ -200,14 +110,7 @@ impl AlterTableProcedure {

            for region in regions {
                let region_id = RegionId::new(table_id, region);
-                let request = self.create_alter_region_request(region_id)?;
-                let request = RegionRequest {
-                    header: Some(RegionRequestHeader {
-                        tracing_context: TracingContext::from_current_span().to_w3c(),
-                        ..Default::default()
-                    }),
-                    body: Some(region_request::Body::Alter(request)),
-                };
+                let request = self.make_alter_region_request(region_id)?;
                debug!("Submitting {request:?} to {datanode}");

                let datanode = datanode.clone();
@@ -238,91 +141,39 @@ impl AlterTableProcedure {
        Ok(Status::executing(true))
    }

-    /// Update table metadata for rename table operation.
-    async fn on_update_metadata_for_rename(&self, new_table_name: String) -> Result<()> {
-        let table_metadata_manager = &self.context.table_metadata_manager;
-
-        let current_table_info_value = self.data.table_info_value.clone();
-
-        table_metadata_manager
-            .rename_table(current_table_info_value, new_table_name)
-            .await?;
-
-        Ok(())
-    }
-
-    async fn on_update_metadata_for_alter(&self, new_table_info: RawTableInfo) -> Result<()> {
-        let table_metadata_manager = &self.context.table_metadata_manager;
-        let current_table_info_value = self.data.table_info_value.clone();
-
-        table_metadata_manager
-            .update_table_info(current_table_info_value, new_table_info)
-            .await?;
-
-        Ok(())
-    }
-
-    fn build_new_table_info(&self) -> Result<TableInfo> {
-        // Builds new_meta
-        let table_info = TableInfo::try_from(self.data.table_info().clone())
-            .context(error::ConvertRawTableInfoSnafu)?;
-
-        let table_ref = self.data.table_ref();
-
-        let request = alter_expr_to_request(self.data.table_id(), self.alter_expr().clone())
-            .context(ConvertAlterTableRequestSnafu)?;
-
-        let new_meta = table_info
-            .meta
-            .builder_with_alter_kind(table_ref.table, &request.alter_kind, false)
-            .context(error::TableSnafu)?
-            .build()
-            .with_context(|_| error::BuildTableMetaSnafu {
-                table_name: table_ref.table,
-            })?;
-
-        let mut new_info = table_info.clone();
-        new_info.meta = new_meta;
-        new_info.ident.version = table_info.ident.version + 1;
-        if let Some(column_id) = self.data.next_column_id {
-            new_info.meta.next_column_id = new_info.meta.next_column_id.max(column_id);
-        }
-
-        if let AlterKind::RenameTable { new_table_name } = &request.alter_kind {
-            new_info.name = new_table_name.to_string();
-        }
-
-        Ok(new_info)
-    }
-
    /// Update table metadata.
-    async fn on_update_metadata(&mut self) -> Result<Status> {
+    pub(crate) async fn on_update_metadata(&mut self) -> Result<Status> {
        let table_id = self.data.table_id();
        let table_ref = self.data.table_ref();
-        let new_info = self.build_new_table_info()?;
+        // Safety: checked before.
+        let table_info_value = self.data.table_info_value.as_ref().unwrap();
+        let new_info = self.build_new_table_info(&table_info_value.table_info)?;

        debug!(
-            "starting update table: {} metadata, new table info {:?}",
+            "Starting update table: {} metadata, new table info {:?}",
            table_ref.to_string(),
            new_info
        );

-        if let Kind::RenameTable(RenameTable { new_table_name }) = self.alter_kind()? {
-            self.on_update_metadata_for_rename(new_table_name.to_string())
+        // Safety: Checked in `AlterTableProcedure::new`.
+        let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
+        if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
+            self.on_update_metadata_for_rename(new_table_name.to_string(), table_info_value)
                .await?;
        } else {
-            self.on_update_metadata_for_alter(new_info.into()).await?;
+            self.on_update_metadata_for_alter(new_info.into(), table_info_value)
+                .await?;
        }

        info!("Updated table metadata for table {table_ref}, table_id: {table_id}");
-
        self.data.state = AlterTableState::InvalidateTableCache;
        Ok(Status::executing(true))
    }

    /// Broadcasts the invalidating table cache instructions.
    async fn on_broadcast(&mut self) -> Result<Status> {
-        let alter_kind = self.alter_kind()?;
+        // Safety: Checked in `AlterTableProcedure::new`.
+        let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
        let cache_invalidator = &self.context.cache_invalidator;
        let cache_keys = if matches!(alter_kind, Kind::RenameTable { .. }) {
            vec![CacheIdent::TableName(self.data.table_ref().into())]
@@ -348,7 +199,9 @@ impl AlterTableProcedure {
        lock_key.push(SchemaLock::read(table_ref.catalog, table_ref.schema).into());
        lock_key.push(TableLock::Write(table_id).into());

-        if let Ok(Kind::RenameTable(RenameTable { new_table_name })) = self.alter_kind() {
+        // Safety: Checked in `AlterTableProcedure::new`.
+        let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
+        if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
            lock_key.push(
                TableNameLock::new(table_ref.catalog, table_ref.schema, new_table_name).into(),
            )
@@ -403,8 +256,9 @@ impl Procedure for AlterTableProcedure {

 #[derive(Debug, Serialize, Deserialize, AsRefStr)]
 enum AlterTableState {
-    /// Prepares to alter the table
+    /// Prepares to alter the table.
    Prepare,
+    /// Sends alter region requests to Datanode.
    SubmitAlterRegionRequests,
    /// Updates table metadata.
    UpdateMetadata,
@@ -412,30 +266,25 @@ enum AlterTableState {
    InvalidateTableCache,
 }

+// The serialized data of alter table.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct AlterTableData {
    cluster_id: u64,
    state: AlterTableState,
    task: AlterTableTask,
+    table_id: TableId,
    /// Table info value before alteration.
-    table_info_value: DeserializedValueWithBytes<TableInfoValue>,
-    /// Next column id of the table if the task adds columns to the table.
-    next_column_id: Option<ColumnId>,
+    table_info_value: Option<DeserializedValueWithBytes<TableInfoValue>>,
 }

 impl AlterTableData {
-    pub fn new(
-        task: AlterTableTask,
-        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
-        cluster_id: u64,
-        next_column_id: Option<ColumnId>,
-    ) -> Self {
+    pub fn new(task: AlterTableTask, table_id: TableId, cluster_id: u64) -> Self {
        Self {
            state: AlterTableState::Prepare,
            task,
-            table_info_value,
+            table_id,
            cluster_id,
-            next_column_id,
+            table_info_value: None,
        }
    }

@@ -444,76 +293,12 @@ impl AlterTableData {
    }

    fn table_id(&self) -> TableId {
-        self.table_info().ident.table_id
+        self.table_id
    }

-    fn table_info(&self) -> &RawTableInfo {
-        &self.table_info_value.table_info
-    }
-}
-
-/// Creates region proto alter kind from `table_info` and `alter_kind`.
-///
-/// Returns the kind and next column id if it adds new columns.
-///
-/// # Panics
-/// Panics if kind is rename.
-pub fn create_proto_alter_kind(
-    table_info: &RawTableInfo,
-    alter_kind: &Kind,
-) -> Result<(Option<alter_request::Kind>, Option<ColumnId>)> {
-    match alter_kind {
-        Kind::AddColumns(x) => {
-            let mut next_column_id = table_info.meta.next_column_id;
-
-            let add_columns = x
-                .add_columns
-                .iter()
-                .map(|add_column| {
-                    let column_def =
-                        add_column
-                            .column_def
-                            .as_ref()
-                            .context(InvalidProtoMsgSnafu {
-                                err_msg: "'column_def' is absent",
-                            })?;
-
-                    let column_id = next_column_id;
-                    next_column_id += 1;
-
-                    let column_def = RegionColumnDef {
-                        column_def: Some(column_def.clone()),
-                        column_id,
-                    };
-
-                    Ok(AddColumn {
-                        column_def: Some(column_def),
-                        location: add_column.location.clone(),
-                    })
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            Ok((
-                Some(alter_request::Kind::AddColumns(AddColumns { add_columns })),
-                Some(next_column_id),
-            ))
-        }
-        Kind::DropColumns(x) => {
-            let drop_columns = x
-                .drop_columns
-                .iter()
-                .map(|x| DropColumn {
-                    name: x.name.clone(),
-                })
-                .collect::<Vec<_>>();
-
-            Ok((
-                Some(alter_request::Kind::DropColumns(DropColumns {
-                    drop_columns,
-                })),
-                None,
-            ))
-        }
-        Kind::RenameTable(_) => Ok((None, None)),
+    fn table_info(&self) -> Option<&RawTableInfo> {
+        self.table_info_value
+            .as_ref()
+            .map(|value| &value.table_info)
    }
 }
--- a/src/common/meta/src/ddl/alter_table/check.rs
+++ b/src/common/meta/src/ddl/alter_table/check.rs
@@ -0,0 +1,62 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::alter_expr::Kind;
+use api::v1::RenameTable;
+use common_catalog::format_full_table_name;
+use snafu::ensure;
+
+use crate::ddl::alter_table::AlterTableProcedure;
+use crate::error::{self, Result};
+use crate::key::table_name::TableNameKey;
+
+impl AlterTableProcedure {
+    /// Checks:
+    /// - The new table name doesn't exist (rename).
+    /// - Table exists.
+    pub(crate) async fn check_alter(&self) -> Result<()> {
+        let alter_expr = &self.data.task.alter_table;
+        let catalog = &alter_expr.catalog_name;
+        let schema = &alter_expr.schema_name;
+        let table_name = &alter_expr.table_name;
+        // Safety: Checked in `AlterTableProcedure::new`.
+        let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
+
+        let manager = &self.context.table_metadata_manager;
+        if let Kind::RenameTable(RenameTable { new_table_name }) = alter_kind {
+            let new_table_name_key = TableNameKey::new(catalog, schema, new_table_name);
+            let exists = manager
+                .table_name_manager()
+                .exists(new_table_name_key)
+                .await?;
+            ensure!(
+                !exists,
+                error::TableAlreadyExistsSnafu {
+                    table_name: format_full_table_name(catalog, schema, new_table_name),
+                }
+            )
+        }
+
+        let table_name_key = TableNameKey::new(catalog, schema, table_name);
+        let exists = manager.table_name_manager().exists(table_name_key).await?;
+        ensure!(
+            exists,
+            error::TableNotFoundSnafu {
+                table_name: format_full_table_name(catalog, schema, &alter_expr.table_name),
+            }
+        );
+
+        Ok(())
+    }
+}
--- a/src/common/meta/src/ddl/alter_table/metadata.rs
+++ b/src/common/meta/src/ddl/alter_table/metadata.rs
@@ -0,0 +1,42 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_catalog::format_full_table_name;
+use snafu::OptionExt;
+
+use crate::ddl::alter_table::AlterTableProcedure;
+use crate::error::{self, Result};
+
+impl AlterTableProcedure {
+    /// Fetches the table info.
+    pub(crate) async fn fill_table_info(&mut self) -> Result<()> {
+        let table_id = self.data.table_id();
+        let alter_expr = &self.data.task.alter_table;
+        let catalog = &alter_expr.catalog_name;
+        let schema = &alter_expr.schema_name;
+        let table_name = &alter_expr.table_name;
+
+        let table_info_value = self
+            .context
+            .table_metadata_manager
+            .table_info_manager()
+            .get(table_id)
+            .await?
+            .with_context(|| error::TableNotFoundSnafu {
+                table_name: format_full_table_name(catalog, schema, table_name),
+            })?;
+        self.data.table_info_value = Some(table_info_value);
+        Ok(())
+    }
+}
--- a/src/common/meta/src/ddl/alter_table/region_request.rs
+++ b/src/common/meta/src/ddl/alter_table/region_request.rs
@@ -0,0 +1,258 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::alter_expr::Kind;
+use api::v1::region::region_request::Body;
+use api::v1::region::{
+    alter_request, AddColumn, AddColumns, AlterRequest, DropColumn, DropColumns, RegionColumnDef,
+    RegionRequest, RegionRequestHeader,
+};
+use common_telemetry::tracing_context::TracingContext;
+use snafu::OptionExt;
+use store_api::storage::RegionId;
+use table::metadata::RawTableInfo;
+
+use crate::ddl::alter_table::AlterTableProcedure;
+use crate::error::{InvalidProtoMsgSnafu, Result};
+
+impl AlterTableProcedure {
+    /// Makes alter region request.
+    pub(crate) fn make_alter_region_request(&self, region_id: RegionId) -> Result<RegionRequest> {
+        // Safety: Checked in `AlterTableProcedure::new`.
+        let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
+        // Safety: checked
+        let table_info = self.data.table_info().unwrap();
+        let kind = create_proto_alter_kind(table_info, alter_kind)?;
+
+        Ok(RegionRequest {
+            header: Some(RegionRequestHeader {
+                tracing_context: TracingContext::from_current_span().to_w3c(),
+                ..Default::default()
+            }),
+            body: Some(Body::Alter(AlterRequest {
+                region_id: region_id.as_u64(),
+                schema_version: table_info.ident.version,
+                kind,
+            })),
+        })
+    }
+}
+
+/// Creates region proto alter kind from `table_info` and `alter_kind`.
+///
+/// Returns the kind and next column id if it adds new columns.
+fn create_proto_alter_kind(
+    table_info: &RawTableInfo,
+    alter_kind: &Kind,
+) -> Result<Option<alter_request::Kind>> {
+    match alter_kind {
+        Kind::AddColumns(x) => {
+            let mut next_column_id = table_info.meta.next_column_id;
+
+            let add_columns = x
+                .add_columns
+                .iter()
+                .map(|add_column| {
+                    let column_def =
+                        add_column
+                            .column_def
+                            .as_ref()
+                            .context(InvalidProtoMsgSnafu {
+                                err_msg: "'column_def' is absent",
+                            })?;
+
+                    let column_id = next_column_id;
+                    next_column_id += 1;
+
+                    let column_def = RegionColumnDef {
+                        column_def: Some(column_def.clone()),
+                        column_id,
+                    };
+
+                    Ok(AddColumn {
+                        column_def: Some(column_def),
+                        location: add_column.location.clone(),
+                    })
+                })
+                .collect::<Result<Vec<_>>>()?;
+
+            Ok(Some(alter_request::Kind::AddColumns(AddColumns {
+                add_columns,
+            })))
+        }
+        Kind::DropColumns(x) => {
+            let drop_columns = x
+                .drop_columns
+                .iter()
+                .map(|x| DropColumn {
+                    name: x.name.clone(),
+                })
+                .collect::<Vec<_>>();
+
+            Ok(Some(alter_request::Kind::DropColumns(DropColumns {
+                drop_columns,
+            })))
+        }
+        Kind::RenameTable(_) => Ok(None),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use api::v1::add_column_location::LocationType;
+    use api::v1::alter_expr::Kind;
+    use api::v1::region::region_request::Body;
+    use api::v1::region::RegionColumnDef;
+    use api::v1::{
+        region, AddColumn, AddColumnLocation, AddColumns, AlterExpr, ColumnDataType,
+        ColumnDef as PbColumnDef, SemanticType,
+    };
+    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+    use store_api::storage::RegionId;
+
+    use crate::ddl::alter_table::AlterTableProcedure;
+    use crate::ddl::test_util::columns::TestColumnDefBuilder;
+    use crate::ddl::test_util::create_table::{
+        build_raw_table_info_from_expr, TestCreateTableExprBuilder,
+    };
+    use crate::key::table_route::TableRouteValue;
+    use crate::peer::Peer;
+    use crate::rpc::ddl::AlterTableTask;
+    use crate::rpc::router::{Region, RegionRoute};
+    use crate::test_util::{new_ddl_context, MockDatanodeManager};
+
+    #[tokio::test]
+    async fn test_make_alter_region_request() {
+        let datanode_manager = Arc::new(MockDatanodeManager::new(()));
+        let ddl_context = new_ddl_context(datanode_manager);
+        let cluster_id = 1;
+        let table_id = 1024;
+        let region_id = RegionId::new(table_id, 1);
+        let table_name = "foo";
+
+        let create_table = TestCreateTableExprBuilder::default()
+            .column_defs([
+                TestColumnDefBuilder::default()
+                    .name("ts")
+                    .data_type(ColumnDataType::TimestampMillisecond)
+                    .semantic_type(SemanticType::Timestamp)
+                    .build()
+                    .unwrap()
+                    .into(),
+                TestColumnDefBuilder::default()
+                    .name("host")
+                    .data_type(ColumnDataType::String)
+                    .semantic_type(SemanticType::Tag)
+                    .build()
+                    .unwrap()
+                    .into(),
+                TestColumnDefBuilder::default()
+                    .name("cpu")
+                    .data_type(ColumnDataType::Float64)
+                    .semantic_type(SemanticType::Field)
+                    .build()
+                    .unwrap()
+                    .into(),
+            ])
+            .table_id(table_id)
+            .time_index("ts")
+            .primary_keys(["host".into()])
+            .table_name(table_name)
+            .build()
+            .unwrap()
+            .into();
+        let table_info = build_raw_table_info_from_expr(&create_table);
+
+        // Puts a value to table name key.
+        ddl_context
+            .table_metadata_manager
+            .create_table_metadata(
+                table_info,
+                TableRouteValue::physical(vec![RegionRoute {
+                    region: Region::new_test(region_id),
+                    leader_peer: Some(Peer::empty(1)),
+                    follower_peers: vec![],
+                    leader_status: None,
+                    leader_down_since: None,
+                }]),
+                HashMap::new(),
+            )
+            .await
+            .unwrap();
+
+        let task = AlterTableTask {
+            alter_table: AlterExpr {
+                catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+                schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+                table_name: table_name.to_string(),
+                kind: Some(Kind::AddColumns(AddColumns {
+                    add_columns: vec![AddColumn {
+                        column_def: Some(PbColumnDef {
+                            name: "my_tag3".to_string(),
+                            data_type: ColumnDataType::String as i32,
+                            is_nullable: true,
+                            default_constraint: b"hello".to_vec(),
+                            semantic_type: SemanticType::Tag as i32,
+                            comment: String::new(),
+                            ..Default::default()
+                        }),
+                        location: Some(AddColumnLocation {
+                            location_type: LocationType::After as i32,
+                            after_column_name: "my_tag2".to_string(),
+                        }),
+                    }],
+                })),
+            },
+        };
+
+        let mut procedure =
+            AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
+        procedure.on_prepare().await.unwrap();
+        let Some(Body::Alter(alter_region_request)) =
+            procedure.make_alter_region_request(region_id).unwrap().body
+        else {
+            unreachable!()
+        };
+        assert_eq!(alter_region_request.region_id, region_id.as_u64());
+        assert_eq!(alter_region_request.schema_version, 1);
+        assert_eq!(
+            alter_region_request.kind,
+            Some(region::alter_request::Kind::AddColumns(
+                region::AddColumns {
+                    add_columns: vec![region::AddColumn {
+                        column_def: Some(RegionColumnDef {
+                            column_def: Some(PbColumnDef {
+                                name: "my_tag3".to_string(),
+                                data_type: ColumnDataType::String as i32,
+                                is_nullable: true,
+                                default_constraint: b"hello".to_vec(),
+                                semantic_type: SemanticType::Tag as i32,
+                                comment: String::new(),
+                                ..Default::default()
+                            }),
+                            column_id: 3,
+                        }),
+                        location: Some(AddColumnLocation {
+                            location_type: LocationType::After as i32,
+                            after_column_name: "my_tag2".to_string(),
+                        }),
+                    }]
+                }
+            ))
+        );
+    }
+}
--- a/src/common/meta/src/ddl/alter_table/update_metadata.rs
+++ b/src/common/meta/src/ddl/alter_table/update_metadata.rs
@@ -0,0 +1,87 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_grpc_expr::alter_expr_to_request;
+use snafu::ResultExt;
+use table::metadata::{RawTableInfo, TableInfo};
+use table::requests::AlterKind;
+
+use crate::ddl::alter_table::AlterTableProcedure;
+use crate::error::{self, Result};
+use crate::key::table_info::TableInfoValue;
+use crate::key::DeserializedValueWithBytes;
+
+impl AlterTableProcedure {
+    /// Builds new_meta
+    pub(crate) fn build_new_table_info(&self, table_info: &RawTableInfo) -> Result<TableInfo> {
+        let table_info =
+            TableInfo::try_from(table_info.clone()).context(error::ConvertRawTableInfoSnafu)?;
+        let table_ref = self.data.table_ref();
+        let alter_expr = self.data.task.alter_table.clone();
+        let request = alter_expr_to_request(self.data.table_id(), alter_expr)
+            .context(error::ConvertAlterTableRequestSnafu)?;
+
+        let new_meta = table_info
+            .meta
+            .builder_with_alter_kind(table_ref.table, &request.alter_kind, false)
+            .context(error::TableSnafu)?
+            .build()
+            .with_context(|_| error::BuildTableMetaSnafu {
+                table_name: table_ref.table,
+            })?;
+
+        let mut new_info = table_info.clone();
+        new_info.meta = new_meta;
+        new_info.ident.version = table_info.ident.version + 1;
+        match request.alter_kind {
+            AlterKind::AddColumns { columns } => {
+                new_info.meta.next_column_id += columns.len() as u32;
+            }
+            AlterKind::RenameTable { new_table_name } => {
+                new_info.name = new_table_name.to_string();
+            }
+            AlterKind::DropColumns { .. } | AlterKind::ChangeColumnTypes { .. } => {}
+        }
+
+        Ok(new_info)
+    }
+
+    /// Updates table metadata for rename table operation.
+    pub(crate) async fn on_update_metadata_for_rename(
+        &self,
+        new_table_name: String,
+        current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
+    ) -> Result<()> {
+        let table_metadata_manager = &self.context.table_metadata_manager;
+        table_metadata_manager
+            .rename_table(current_table_info_value, new_table_name)
+            .await?;
+
+        Ok(())
+    }
+
+    /// Updates table metadata for alter table operation.
+    pub(crate) async fn on_update_metadata_for_alter(
+        &self,
+        new_table_info: RawTableInfo,
+        current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
+    ) -> Result<()> {
+        let table_metadata_manager = &self.context.table_metadata_manager;
+        table_metadata_manager
+            .update_table_info(current_table_info_value, new_table_info)
+            .await?;
+
+        Ok(())
+    }
+}
--- a/src/common/meta/src/ddl/create_logical_tables/update_metadata.rs
+++ b/src/common/meta/src/ddl/create_logical_tables/update_metadata.rs
@@ -61,7 +61,7 @@ impl CreateLogicalTablesProcedure {
        // Update physical table's metadata
        self.context
            .table_metadata_manager
-            .update_table_info(physical_table_info, new_table_info)
+            .update_table_info(&physical_table_info, new_table_info)
            .await?;

        // Invalid physical table cache
--- a/src/common/meta/src/ddl/create_table.rs
+++ b/src/common/meta/src/ddl/create_table.rs
@@ -271,7 +271,7 @@ impl CreateTableProcedure {
    ///
    /// Abort(not-retry):
    /// - Failed to create table metadata.
-    async fn on_create_metadata(&self) -> Result<Status> {
+    async fn on_create_metadata(&mut self) -> Result<Status> {
        let table_id = self.table_id();
        let manager = &self.context.table_metadata_manager;

@@ -285,6 +285,7 @@ impl CreateTableProcedure {
            .await?;
        info!("Created table metadata for table {table_id}");

+        self.creator.opening_regions.clear();
        Ok(Status::done_with_output(table_id))
    }
 }
@@ -385,7 +386,7 @@ impl TableCreator {
    }
 }

-#[derive(Debug, Clone, Serialize, Deserialize, AsRefStr)]
+#[derive(Debug, Clone, Serialize, Deserialize, AsRefStr, PartialEq)]
 pub enum CreateTableState {
    /// Prepares to create the table
    Prepare,
--- a/src/common/meta/src/ddl/drop_database/cursor.rs
+++ b/src/common/meta/src/ddl/drop_database/cursor.rs
@@ -76,6 +76,7 @@ impl DropDatabaseCursor {
                    .await?;
                Ok((
                    Box::new(DropDatabaseExecutor::new(
+                        table_id,
                        table_id,
                        TableName::new(&ctx.catalog, &ctx.schema, &table_name),
                        table_route.region_routes,
@@ -86,6 +87,7 @@ impl DropDatabaseCursor {
            }
            (DropTableTarget::Physical, TableRouteValue::Physical(table_route)) => Ok((
                Box::new(DropDatabaseExecutor::new(
+                    table_id,
                    table_id,
                    TableName::new(&ctx.catalog, &ctx.schema, &table_name),
                    table_route.region_routes,
@@ -163,7 +165,7 @@ mod tests {
    async fn test_next_without_logical_tables() {
        let datanode_manager = Arc::new(MockDatanodeManager::new(()));
        let ddl_context = new_ddl_context(datanode_manager);
-        create_physical_table(ddl_context.clone(), 0, "phy").await;
+        create_physical_table(&ddl_context, 0, "phy").await;
        // It always starts from Logical
        let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
        let mut ctx = DropDatabaseContext {
@@ -197,7 +199,7 @@ mod tests {
    async fn test_next_with_logical_tables() {
        let datanode_manager = Arc::new(MockDatanodeManager::new(()));
        let ddl_context = new_ddl_context(datanode_manager);
-        let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric_0").await;
        // It always starts from Logical
        let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
@@ -220,7 +222,7 @@ mod tests {
            .get_physical_table_route(physical_table_id)
            .await
            .unwrap();
-        assert_eq!(table_route.region_routes, executor.region_routes);
+        assert_eq!(table_route.region_routes, executor.physical_region_routes);
        assert_eq!(executor.target, DropTableTarget::Logical);
    }

--- a/src/common/meta/src/ddl/drop_database/executor.rs
+++ b/src/common/meta/src/ddl/drop_database/executor.rs
@@ -26,6 +26,7 @@ use crate::ddl::drop_database::State;
 use crate::ddl::drop_table::executor::DropTableExecutor;
 use crate::ddl::DdlContext;
 use crate::error::{self, Result};
+use crate::key::table_route::TableRouteValue;
 use crate::region_keeper::OperatingRegionGuard;
 use crate::rpc::router::{operating_leader_regions, RegionRoute};
 use crate::table_name::TableName;
@@ -33,8 +34,10 @@ use crate::table_name::TableName;
 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct DropDatabaseExecutor {
    table_id: TableId,
+    physical_table_id: TableId,
    table_name: TableName,
-    pub(crate) region_routes: Vec<RegionRoute>,
+    /// The physical table region routes.
+    pub(crate) physical_region_routes: Vec<RegionRoute>,
    pub(crate) target: DropTableTarget,
    #[serde(skip)]
    dropping_regions: Vec<OperatingRegionGuard>,
@@ -44,14 +47,16 @@ impl DropDatabaseExecutor {
    /// Returns a new [DropDatabaseExecutor].
    pub fn new(
        table_id: TableId,
+        physical_table_id: TableId,
        table_name: TableName,
-        region_routes: Vec<RegionRoute>,
+        physical_region_routes: Vec<RegionRoute>,
        target: DropTableTarget,
    ) -> Self {
        Self {
-            table_name,
            table_id,
-            region_routes,
+            physical_table_id,
+            table_name,
+            physical_region_routes,
            target,
            dropping_regions: vec![],
        }
@@ -60,7 +65,7 @@ impl DropDatabaseExecutor {

 impl DropDatabaseExecutor {
    fn register_dropping_regions(&mut self, ddl_ctx: &DdlContext) -> Result<()> {
-        let dropping_regions = operating_leader_regions(&self.region_routes);
+        let dropping_regions = operating_leader_regions(&self.physical_region_routes);
        let mut dropping_region_guards = Vec::with_capacity(dropping_regions.len());
        for (region_id, datanode_id) in dropping_regions {
            let guard = ddl_ctx
@@ -87,12 +92,18 @@ impl State for DropDatabaseExecutor {
    ) -> Result<(Box<dyn State>, Status)> {
        self.register_dropping_regions(ddl_ctx)?;
        let executor = DropTableExecutor::new(self.table_name.clone(), self.table_id, true);
+        // Deletes metadata for table permanently.
+        let table_route_value = TableRouteValue::new(
+            self.table_id,
+            self.physical_table_id,
+            self.physical_region_routes.clone(),
+        );
        executor
-            .on_remove_metadata(ddl_ctx, &self.region_routes)
+            .on_destroy_metadata(ddl_ctx, &table_route_value)
            .await?;
        executor.invalidate_table_cache(ddl_ctx).await?;
        executor
-            .on_drop_regions(ddl_ctx, &self.region_routes)
+            .on_drop_regions(ddl_ctx, &self.physical_region_routes)
            .await?;
        info!("Table: {}({}) is dropped", self.table_name, self.table_id);

@@ -111,18 +122,20 @@ impl State for DropDatabaseExecutor {
 mod tests {
    use std::sync::Arc;

+    use api::region::RegionResponse;
    use api::v1::region::{QueryRequest, RegionRequest};
    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
    use common_error::ext::BoxedError;
    use common_recordbatch::SendableRecordBatchStream;

-    use crate::datanode_manager::HandleResponse;
    use crate::ddl::drop_database::cursor::DropDatabaseCursor;
    use crate::ddl::drop_database::executor::DropDatabaseExecutor;
    use crate::ddl::drop_database::{DropDatabaseContext, DropTableTarget, State};
    use crate::ddl::test_util::{create_logical_table, create_physical_table};
    use crate::error::{self, Error, Result};
+    use crate::key::datanode_table::DatanodeTableKey;
    use crate::peer::Peer;
+    use crate::rpc::router::region_distribution;
    use crate::table_name::TableName;
    use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};

@@ -131,8 +144,8 @@ mod tests {

    #[async_trait::async_trait]
    impl MockDatanodeHandler for NaiveDatanodeHandler {
-        async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<HandleResponse> {
-            Ok(HandleResponse::new(0))
+        async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
+            Ok(RegionResponse::new(0))
        }

        async fn handle_query(
@@ -148,7 +161,7 @@ mod tests {
    async fn test_next_with_physical_table() {
        let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
        let ddl_context = new_ddl_context(datanode_manager);
-        let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        let (_, table_route) = ddl_context
            .table_metadata_manager
            .table_route_manager()
@@ -157,6 +170,7 @@ mod tests {
            .unwrap();
        {
            let mut state = DropDatabaseExecutor::new(
+                physical_table_id,
                physical_table_id,
                TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
                table_route.region_routes.clone(),
@@ -181,9 +195,10 @@ mod tests {
            tables: None,
        };
        let mut state = DropDatabaseExecutor::new(
+            physical_table_id,
            physical_table_id,
            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
-            table_route.region_routes,
+            table_route.region_routes.clone(),
            DropTableTarget::Physical,
        );
        let (state, status) = state.next(&ddl_context, &mut ctx).await.unwrap();
@@ -196,7 +211,7 @@ mod tests {
    async fn test_next_logical_table() {
        let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
        let ddl_context = new_ddl_context(datanode_manager);
-        let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric").await;
        let logical_table_id = physical_table_id + 1;
        let (_, table_route) = ddl_context
@@ -207,6 +222,7 @@ mod tests {
            .unwrap();
        {
            let mut state = DropDatabaseExecutor::new(
+                logical_table_id,
                physical_table_id,
                TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "metric"),
                table_route.region_routes.clone(),
@@ -231,8 +247,9 @@ mod tests {
            tables: None,
        };
        let mut state = DropDatabaseExecutor::new(
+            logical_table_id,
            physical_table_id,
-            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
+            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "metric"),
            table_route.region_routes,
            DropTableTarget::Logical,
        );
@@ -240,6 +257,33 @@ mod tests {
        assert!(!status.need_persist());
        let cursor = state.as_any().downcast_ref::<DropDatabaseCursor>().unwrap();
        assert_eq!(cursor.target, DropTableTarget::Logical);
+        // Checks table info
+        ddl_context
+            .table_metadata_manager
+            .table_info_manager()
+            .get(physical_table_id)
+            .await
+            .unwrap()
+            .unwrap();
+        // Checks table route
+        let table_route = ddl_context
+            .table_metadata_manager
+            .table_route_manager()
+            .table_route_storage()
+            .get(physical_table_id)
+            .await
+            .unwrap()
+            .unwrap();
+        let region_routes = table_route.region_routes().unwrap();
+        for datanode_id in region_distribution(region_routes).into_keys() {
+            ddl_context
+                .table_metadata_manager
+                .datanode_table_manager()
+                .get(&DatanodeTableKey::new(datanode_id, physical_table_id))
+                .await
+                .unwrap()
+                .unwrap();
+        }
    }

    #[derive(Clone)]
@@ -247,7 +291,7 @@ mod tests {

    #[async_trait::async_trait]
    impl MockDatanodeHandler for RetryErrorDatanodeHandler {
-        async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<HandleResponse> {
+        async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
            Err(Error::RetryLater {
                source: BoxedError::new(
                    error::UnexpectedSnafu {
@@ -271,7 +315,7 @@ mod tests {
    async fn test_next_retryable_err() {
        let datanode_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
        let ddl_context = new_ddl_context(datanode_manager);
-        let physical_table_id = create_physical_table(ddl_context.clone(), 0, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        let (_, table_route) = ddl_context
            .table_metadata_manager
            .table_route_manager()
@@ -279,6 +323,7 @@ mod tests {
            .await
            .unwrap();
        let mut state = DropDatabaseExecutor::new(
+            physical_table_id,
            physical_table_id,
            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy"),
            table_route.region_routes,
--- a/src/common/meta/src/ddl/drop_database/metadata.rs
+++ b/src/common/meta/src/ddl/drop_database/metadata.rs
@@ -18,10 +18,12 @@ use common_procedure::Status;
 use serde::{Deserialize, Serialize};

 use super::end::DropDatabaseEnd;
+use crate::cache_invalidator::Context;
 use crate::ddl::drop_database::{DropDatabaseContext, State};
 use crate::ddl::DdlContext;
 use crate::error::Result;
-use crate::key::schema_name::SchemaNameKey;
+use crate::instruction::CacheIdent;
+use crate::key::schema_name::{SchemaName, SchemaNameKey};

 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct DropDatabaseRemoveMetadata;
@@ -40,7 +42,53 @@ impl State for DropDatabaseRemoveMetadata {
            .delete(SchemaNameKey::new(&ctx.catalog, &ctx.schema))
            .await?;

-        return Ok((Box::new(DropDatabaseEnd), Status::done()));
+        return Ok((Box::new(DropMetadataBroadcast), Status::executing(true)));
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct DropMetadataBroadcast;
+
+impl DropMetadataBroadcast {
+    /// Invalidates frontend caches
+    async fn invalidate_schema_cache(
+        &self,
+        ddl_ctx: &DdlContext,
+        db_ctx: &mut DropDatabaseContext,
+    ) -> Result<()> {
+        let cache_invalidator = &ddl_ctx.cache_invalidator;
+        let ctx = Context {
+            subject: Some("Invalidate schema cache by dropping database".to_string()),
+        };
+
+        cache_invalidator
+            .invalidate(
+                &ctx,
+                vec![CacheIdent::SchemaName(SchemaName {
+                    catalog_name: db_ctx.catalog.clone(),
+                    schema_name: db_ctx.schema.clone(),
+                })],
+            )
+            .await?;
+
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait]
+#[typetag::serde]
+impl State for DropMetadataBroadcast {
+    async fn next(
+        &mut self,
+        ddl_ctx: &DdlContext,
+        ctx: &mut DropDatabaseContext,
+    ) -> Result<(Box<dyn State>, Status)> {
+        self.invalidate_schema_cache(ddl_ctx, ctx).await?;
+        Ok((Box::new(DropDatabaseEnd), Status::done()))
    }

    fn as_any(&self) -> &dyn Any {
@@ -53,7 +101,7 @@ mod tests {
    use std::sync::Arc;

    use crate::ddl::drop_database::end::DropDatabaseEnd;
-    use crate::ddl::drop_database::metadata::DropDatabaseRemoveMetadata;
+    use crate::ddl::drop_database::metadata::{DropDatabaseRemoveMetadata, DropMetadataBroadcast};
    use crate::ddl::drop_database::{DropDatabaseContext, State};
    use crate::key::schema_name::SchemaNameKey;
    use crate::test_util::{new_ddl_context, MockDatanodeManager};
@@ -76,14 +124,23 @@ mod tests {
            tables: None,
        };
        let (state, status) = state.next(&ddl_context, &mut ctx).await.unwrap();
-        state.as_any().downcast_ref::<DropDatabaseEnd>().unwrap();
-        assert!(status.is_done());
+        state
+            .as_any()
+            .downcast_ref::<DropMetadataBroadcast>()
+            .unwrap();
+        assert!(!status.is_done());
        assert!(!ddl_context
            .table_metadata_manager
            .schema_manager()
            .exists(SchemaNameKey::new("foo", "bar"))
            .await
            .unwrap());
+
+        let mut state = DropMetadataBroadcast;
+        let (state, status) = state.next(&ddl_context, &mut ctx).await.unwrap();
+        state.as_any().downcast_ref::<DropDatabaseEnd>().unwrap();
+        assert!(status.is_done());
+
        // Schema not exists
        let mut state = DropDatabaseRemoveMetadata;
        let mut ctx = DropDatabaseContext {
@@ -93,7 +150,10 @@ mod tests {
            tables: None,
        };
        let (state, status) = state.next(&ddl_context, &mut ctx).await.unwrap();
-        state.as_any().downcast_ref::<DropDatabaseEnd>().unwrap();
-        assert!(status.is_done());
+        state
+            .as_any()
+            .downcast_ref::<DropMetadataBroadcast>()
+            .unwrap();
+        assert!(!status.is_done());
    }
 }
--- a/src/common/meta/src/ddl/drop_table.rs
+++ b/src/common/meta/src/ddl/drop_table.rs
@@ -12,27 +12,28 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-pub mod executor;
+pub(crate) mod executor;
+mod metadata;

 use async_trait::async_trait;
 use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
 use common_procedure::{
-    Context as ProcedureContext, LockKey, Procedure, Result as ProcedureResult, Status,
+    Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
+    Result as ProcedureResult, Status,
 };
 use common_telemetry::info;
+use common_telemetry::tracing::warn;
 use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt};
 use strum::AsRefStr;
-use table::metadata::{RawTableInfo, TableId};
+use table::metadata::TableId;
 use table::table_reference::TableReference;

 use self::executor::DropTableExecutor;
 use crate::ddl::utils::handle_retry_error;
 use crate::ddl::DdlContext;
 use crate::error::{self, Result};
-use crate::key::table_info::TableInfoValue;
 use crate::key::table_route::TableRouteValue;
-use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::metrics;
 use crate::region_keeper::OperatingRegionGuard;
@@ -45,51 +46,51 @@ pub struct DropTableProcedure {
    /// The serializable data.
    pub data: DropTableData,
    /// The guards of opening regions.
-    pub dropping_regions: Vec<OperatingRegionGuard>,
+    pub(crate) dropping_regions: Vec<OperatingRegionGuard>,
+    /// The drop table executor.
+    executor: DropTableExecutor,
 }

 impl DropTableProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::DropTable";

-    pub fn new(
-        cluster_id: u64,
-        task: DropTableTask,
-        table_route_value: DeserializedValueWithBytes<TableRouteValue>,
-        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
-        context: DdlContext,
-    ) -> Self {
+    pub fn new(cluster_id: u64, task: DropTableTask, context: DdlContext) -> Self {
+        let data = DropTableData::new(cluster_id, task);
+        let executor = data.build_executor();
        Self {
            context,
-            data: DropTableData::new(cluster_id, task, table_route_value, table_info_value),
+            data,
            dropping_regions: vec![],
+            executor,
        }
    }

    pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
-        let data = serde_json::from_str(json).context(FromJsonSnafu)?;
+        let data: DropTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
+        let executor = data.build_executor();
        Ok(Self {
            context,
            data,
            dropping_regions: vec![],
+            executor,
        })
    }

-    async fn on_prepare<'a>(&mut self, executor: &DropTableExecutor) -> Result<Status> {
-        if executor.on_prepare(&self.context).await?.stop() {
+    pub(crate) async fn on_prepare<'a>(&mut self) -> Result<Status> {
+        if self.executor.on_prepare(&self.context).await?.stop() {
            return Ok(Status::done());
        }
-        self.data.state = DropTableState::RemoveMetadata;
+        self.fill_table_metadata().await?;
+        self.data.state = DropTableState::DeleteMetadata;

        Ok(Status::executing(true))
    }

    /// Register dropping regions if doesn't exist.
    fn register_dropping_regions(&mut self) -> Result<()> {
-        let region_routes = self.data.region_routes()?;
+        let dropping_regions = operating_leader_regions(&self.data.physical_region_routes);

-        let dropping_regions = operating_leader_regions(region_routes);
-
-        if self.dropping_regions.len() == dropping_regions.len() {
+        if !self.dropping_regions.is_empty() {
            return Ok(());
        }

@@ -112,7 +113,7 @@ impl DropTableProcedure {
    }

    /// Removes the table metadata.
-    async fn on_remove_metadata(&mut self, executor: &DropTableExecutor) -> Result<Status> {
+    pub(crate) async fn on_delete_metadata(&mut self) -> Result<Status> {
        self.register_dropping_regions()?;
        // NOTES: If the meta server is crashed after the `RemoveMetadata`,
        // Corresponding regions of this table on the Datanode will be closed automatically.
@@ -120,8 +121,15 @@ impl DropTableProcedure {

        // TODO(weny): Considers introducing a RegionStatus to indicate the region is dropping.
        let table_id = self.data.table_id();
-        executor
-            .on_remove_metadata(&self.context, self.data.region_routes()?)
+        let table_route_value = &TableRouteValue::new(
+            self.data.task.table_id,
+            // Safety: checked
+            self.data.physical_table_id.unwrap(),
+            self.data.physical_region_routes.clone(),
+        );
+        // Deletes table metadata logically.
+        self.executor
+            .on_delete_metadata(&self.context, table_route_value)
            .await?;
        info!("Deleted table metadata for table {table_id}");
        self.data.state = DropTableState::InvalidateTableCache;
@@ -129,17 +137,34 @@ impl DropTableProcedure {
    }

    /// Broadcasts invalidate table cache instruction.
-    async fn on_broadcast(&mut self, executor: &DropTableExecutor) -> Result<Status> {
-        executor.invalidate_table_cache(&self.context).await?;
+    async fn on_broadcast(&mut self) -> Result<Status> {
+        self.executor.invalidate_table_cache(&self.context).await?;
        self.data.state = DropTableState::DatanodeDropRegions;

        Ok(Status::executing(true))
    }

-    pub async fn on_datanode_drop_regions(&self, executor: &DropTableExecutor) -> Result<Status> {
-        executor
-            .on_drop_regions(&self.context, self.data.region_routes()?)
+    pub async fn on_datanode_drop_regions(&mut self) -> Result<Status> {
+        self.executor
+            .on_drop_regions(&self.context, &self.data.physical_region_routes)
            .await?;
+        self.data.state = DropTableState::DeleteTombstone;
+        Ok(Status::executing(true))
+    }
+
+    /// Deletes metadata tombstone.
+    async fn on_delete_metadata_tombstone(&mut self) -> Result<Status> {
+        let table_route_value = &TableRouteValue::new(
+            self.data.task.table_id,
+            // Safety: checked
+            self.data.physical_table_id.unwrap(),
+            self.data.physical_region_routes.clone(),
+        );
+        self.executor
+            .on_delete_metadata_tombstone(&self.context, table_route_value)
+            .await?;
+
+        self.dropping_regions.clear();
        Ok(Status::done())
    }
 }
@@ -151,21 +176,17 @@ impl Procedure for DropTableProcedure {
    }

    async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
-        let executor = DropTableExecutor::new(
-            self.data.task.table_name(),
-            self.data.table_id(),
-            self.data.task.drop_if_exists,
-        );
        let state = &self.data.state;
        let _timer = metrics::METRIC_META_PROCEDURE_DROP_TABLE
            .with_label_values(&[state.as_ref()])
            .start_timer();

        match self.data.state {
-            DropTableState::Prepare => self.on_prepare(&executor).await,
-            DropTableState::RemoveMetadata => self.on_remove_metadata(&executor).await,
-            DropTableState::InvalidateTableCache => self.on_broadcast(&executor).await,
-            DropTableState::DatanodeDropRegions => self.on_datanode_drop_regions(&executor).await,
+            DropTableState::Prepare => self.on_prepare().await,
+            DropTableState::DeleteMetadata => self.on_delete_metadata().await,
+            DropTableState::InvalidateTableCache => self.on_broadcast().await,
+            DropTableState::DatanodeDropRegions => self.on_datanode_drop_regions().await,
+            DropTableState::DeleteTombstone => self.on_delete_metadata_tombstone().await,
        }
        .map_err(handle_retry_error)
    }
@@ -185,31 +206,47 @@ impl Procedure for DropTableProcedure {

        LockKey::new(lock_key)
    }
+
+    fn rollback_supported(&self) -> bool {
+        !matches!(self.data.state, DropTableState::Prepare)
+    }
+
+    async fn rollback(&mut self, _: &ProcedureContext) -> ProcedureResult<()> {
+        warn!(
+            "Rolling back the drop table procedure, table: {}",
+            self.data.table_id()
+        );
+
+        let table_route_value = &TableRouteValue::new(
+            self.data.task.table_id,
+            // Safety: checked
+            self.data.physical_table_id.unwrap(),
+            self.data.physical_region_routes.clone(),
+        );
+        self.executor
+            .on_restore_metadata(&self.context, table_route_value)
+            .await
+            .map_err(ProcedureError::external)
+    }
 }

 #[derive(Debug, Serialize, Deserialize)]
-/// TODO(weny): simplify the table data.
 pub struct DropTableData {
    pub state: DropTableState,
    pub cluster_id: u64,
    pub task: DropTableTask,
-    pub table_route_value: DeserializedValueWithBytes<TableRouteValue>,
-    pub table_info_value: DeserializedValueWithBytes<TableInfoValue>,
+    pub physical_region_routes: Vec<RegionRoute>,
+    pub physical_table_id: Option<TableId>,
 }

 impl DropTableData {
-    pub fn new(
-        cluster_id: u64,
-        task: DropTableTask,
-        table_route_value: DeserializedValueWithBytes<TableRouteValue>,
-        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
-    ) -> Self {
+    pub fn new(cluster_id: u64, task: DropTableTask) -> Self {
        Self {
            state: DropTableState::Prepare,
            cluster_id,
            task,
-            table_info_value,
-            table_route_value,
+            physical_region_routes: vec![],
+            physical_table_id: None,
        }
    }

@@ -217,27 +254,30 @@ impl DropTableData {
        self.task.table_ref()
    }

-    fn region_routes(&self) -> Result<&Vec<RegionRoute>> {
-        self.table_route_value.region_routes()
-    }
-
-    fn table_info(&self) -> &RawTableInfo {
-        &self.table_info_value.table_info
-    }
-
    fn table_id(&self) -> TableId {
-        self.table_info().ident.table_id
+        self.task.table_id
+    }
+
+    fn build_executor(&self) -> DropTableExecutor {
+        DropTableExecutor::new(
+            self.task.table_name(),
+            self.task.table_id,
+            self.task.drop_if_exists,
+        )
    }
 }

-#[derive(Debug, Serialize, Deserialize, AsRefStr)]
+/// The state of drop table.
+#[derive(Debug, Serialize, Deserialize, AsRefStr, PartialEq)]
 pub enum DropTableState {
    /// Prepares to drop the table
    Prepare,
-    /// Removes metadata
-    RemoveMetadata,
+    /// Deletes metadata logically
+    DeleteMetadata,
    /// Invalidates Table Cache
    InvalidateTableCache,
    /// Drops regions on Datanode
    DatanodeDropRegions,
+    /// Deletes metadata tombstone permanently
+    DeleteTombstone,
 }
--- a/src/common/meta/src/ddl/drop_table/executor.rs
+++ b/src/common/meta/src/ddl/drop_table/executor.rs
@@ -30,6 +30,7 @@ use crate::ddl::DdlContext;
 use crate::error::{self, Result};
 use crate::instruction::CacheIdent;
 use crate::key::table_name::TableNameKey;
+use crate::key::table_route::TableRouteValue;
 use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
 use crate::table_name::TableName;

@@ -99,14 +100,47 @@ impl DropTableExecutor {
        Ok(Control::Continue(()))
    }

-    /// Removes the table metadata.
-    pub async fn on_remove_metadata(
+    /// Deletes the table metadata **logically**.
+    pub async fn on_delete_metadata(
        &self,
        ctx: &DdlContext,
-        region_routes: &[RegionRoute],
+        table_route_value: &TableRouteValue,
    ) -> Result<()> {
        ctx.table_metadata_manager
-            .delete_table_metadata(self.table_id, &self.table, region_routes)
+            .delete_table_metadata(self.table_id, &self.table, table_route_value)
+            .await
+    }
+
+    /// Deletes the table metadata tombstone **permanently**.
+    pub async fn on_delete_metadata_tombstone(
+        &self,
+        ctx: &DdlContext,
+        table_route_value: &TableRouteValue,
+    ) -> Result<()> {
+        ctx.table_metadata_manager
+            .delete_table_metadata_tombstone(self.table_id, &self.table, table_route_value)
+            .await
+    }
+
+    /// Deletes metadata for table **permanently**.
+    pub async fn on_destroy_metadata(
+        &self,
+        ctx: &DdlContext,
+        table_route_value: &TableRouteValue,
+    ) -> Result<()> {
+        ctx.table_metadata_manager
+            .destroy_table_metadata(self.table_id, &self.table, table_route_value)
+            .await
+    }
+
+    /// Restores the table metadata.
+    pub async fn on_restore_metadata(
+        &self,
+        ctx: &DdlContext,
+        table_route_value: &TableRouteValue,
+    ) -> Result<()> {
+        ctx.table_metadata_manager
+            .restore_table_metadata(self.table_id, &self.table, table_route_value)
            .await
    }

--- a/src/common/meta/src/ddl/drop_table/metadata.rs
+++ b/src/common/meta/src/ddl/drop_table/metadata.rs
@@ -0,0 +1,34 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::ddl::drop_table::DropTableProcedure;
+use crate::error::Result;
+
+impl DropTableProcedure {
+    /// Fetches the table info and physical table route.
+    pub(crate) async fn fill_table_metadata(&mut self) -> Result<()> {
+        let task = &self.data.task;
+        let (physical_table_id, physical_table_route_value) = self
+            .context
+            .table_metadata_manager
+            .table_route_manager()
+            .get_physical_table_route(task.table_id)
+            .await?;
+
+        self.data.physical_region_routes = physical_table_route_value.region_routes;
+        self.data.physical_table_id = Some(physical_table_id);
+
+        Ok(())
+    }
+}
--- a/src/common/meta/src/ddl/physical_table_metadata.rs
+++ b/src/common/meta/src/ddl/physical_table_metadata.rs
@@ -52,5 +52,9 @@ pub(crate) fn build_new_physical_table_info(
        columns.push(col.column_schema.clone());
    }

+    if let Some(time_index) = *time_index {
+        raw_table_info.meta.schema.column_schemas[time_index].set_time_index();
+    }
+
    raw_table_info
 }
--- a/src/common/meta/src/ddl/test_util.rs
+++ b/src/common/meta/src/ddl/test_util.rs
@@ -15,6 +15,7 @@
 pub mod alter_table;
 pub mod columns;
 pub mod create_table;
+pub mod datanode_handler;

 use std::collections::HashMap;

@@ -46,7 +47,7 @@ pub async fn create_physical_table_metadata(
 }

 pub async fn create_physical_table(
-    ddl_context: DdlContext,
+    ddl_context: &DdlContext,
    cluster_id: ClusterId,
    name: &str,
 ) -> TableId {
@@ -66,7 +67,7 @@ pub async fn create_physical_table(
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
    create_physical_table_metadata(
-        &ddl_context,
+        ddl_context,
        create_physical_table_task.table_info.clone(),
        TableRouteValue::Physical(table_route),
    )
@@ -80,7 +81,7 @@ pub async fn create_logical_table(
    cluster_id: ClusterId,
    physical_table_id: TableId,
    table_name: &str,
-) {
+) -> TableId {
    use std::assert_matches::assert_matches;

    let tasks = vec![test_create_logical_table_task(table_name)];
@@ -90,6 +91,14 @@ pub async fn create_logical_table(
    assert_matches!(status, Status::Executing { persist: true });
    let status = procedure.on_create_metadata().await.unwrap();
    assert_matches!(status, Status::Done { .. });
+
+    let Status::Done {
+        output: Some(output),
+    } = status
+    else {
+        panic!("Unexpected status: {:?}", status);
+    };
+    output.downcast_ref::<Vec<u32>>().unwrap()[0]
 }

 pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
--- a/src/common/meta/src/ddl/test_util/alter_table.rs
+++ b/src/common/meta/src/ddl/test_util/alter_table.rs
@@ -28,7 +28,7 @@ pub struct TestAlterTableExpr {
    table_name: String,
    #[builder(setter(into))]
    add_columns: Vec<ColumnDef>,
-    #[builder(setter(into))]
+    #[builder(setter(into, strip_option))]
    new_table_name: Option<String>,
 }

--- a/src/common/meta/src/ddl/test_util/create_table.rs
+++ b/src/common/meta/src/ddl/test_util/create_table.rs
@@ -15,7 +15,8 @@
 use std::collections::HashMap;

 use api::v1::column_def::try_as_column_schema;
-use api::v1::{ColumnDef, CreateTableExpr, SemanticType};
+use api::v1::meta::Partition;
+use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType};
 use chrono::DateTime;
 use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO2_ENGINE};
 use datatypes::schema::RawSchema;
@@ -24,6 +25,9 @@ use store_api::storage::TableId;
 use table::metadata::{RawTableInfo, RawTableMeta, TableIdent, TableType};
 use table::requests::TableOptions;

+use crate::ddl::test_util::columns::TestColumnDefBuilder;
+use crate::rpc::ddl::CreateTableTask;
+
 #[derive(Default, Builder)]
 #[builder(default)]
 pub struct TestCreateTableExpr {
@@ -43,6 +47,7 @@ pub struct TestCreateTableExpr {
    primary_keys: Vec<String>,
    create_if_not_exists: bool,
    table_options: HashMap<String, String>,
+    #[builder(setter(into, strip_option))]
    table_id: Option<TableId>,
    #[builder(setter(into), default = "MITO2_ENGINE.to_string()")]
    engine: String,
@@ -129,3 +134,47 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo {
        table_type: TableType::Base,
    }
 }
+
+pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask {
+    let create_table = TestCreateTableExprBuilder::default()
+        .column_defs([
+            TestColumnDefBuilder::default()
+                .name("ts")
+                .data_type(ColumnDataType::TimestampMillisecond)
+                .semantic_type(SemanticType::Timestamp)
+                .build()
+                .unwrap()
+                .into(),
+            TestColumnDefBuilder::default()
+                .name("host")
+                .data_type(ColumnDataType::String)
+                .semantic_type(SemanticType::Tag)
+                .build()
+                .unwrap()
+                .into(),
+            TestColumnDefBuilder::default()
+                .name("cpu")
+                .data_type(ColumnDataType::Float64)
+                .semantic_type(SemanticType::Field)
+                .build()
+                .unwrap()
+                .into(),
+        ])
+        .table_id(table_id)
+        .time_index("ts")
+        .primary_keys(["host".into()])
+        .table_name(name)
+        .build()
+        .unwrap()
+        .into();
+    let table_info = build_raw_table_info_from_expr(&create_table);
+    CreateTableTask {
+        create_table,
+        // Single region
+        partitions: vec![Partition {
+            column_list: vec![],
+            value_list: vec![],
+        }],
+        table_info,
+    }
+}
--- a/src/common/meta/src/ddl/test_util/datanode_handler.rs
+++ b/src/common/meta/src/ddl/test_util/datanode_handler.rs
@@ -0,0 +1,169 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::region::RegionResponse;
+use api::v1::region::{QueryRequest, RegionRequest};
+use common_error::ext::{BoxedError, ErrorExt, StackError};
+use common_error::status_code::StatusCode;
+use common_recordbatch::SendableRecordBatchStream;
+use common_telemetry::debug;
+use snafu::{ResultExt, Snafu};
+use tokio::sync::mpsc;
+
+use crate::error::{self, Error, Result};
+use crate::peer::Peer;
+use crate::test_util::MockDatanodeHandler;
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for () {
+    async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
+        unreachable!()
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
+
+#[derive(Clone)]
+pub struct DatanodeWatcher(pub mpsc::Sender<(Peer, RegionRequest)>);
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for DatanodeWatcher {
+    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
+        debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
+        self.0.send((peer.clone(), request)).await.unwrap();
+        Ok(RegionResponse::new(0))
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
+
+#[derive(Clone)]
+pub struct RetryErrorDatanodeHandler;
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for RetryErrorDatanodeHandler {
+    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
+        debug!("Returning retry later for request: {request:?}, peer: {peer:?}");
+        Err(Error::RetryLater {
+            source: BoxedError::new(
+                error::UnexpectedSnafu {
+                    err_msg: "retry later",
+                }
+                .build(),
+            ),
+        })
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
+
+#[derive(Clone)]
+pub struct UnexpectedErrorDatanodeHandler;
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for UnexpectedErrorDatanodeHandler {
+    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
+        debug!("Returning mock error for request: {request:?}, peer: {peer:?}");
+        error::UnexpectedSnafu {
+            err_msg: "mock error",
+        }
+        .fail()
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
+
+#[derive(Clone)]
+pub struct RequestOutdatedErrorDatanodeHandler;
+
+#[derive(Debug, Snafu)]
+#[snafu(display("A mock RequestOutdated error"))]
+struct MockRequestOutdatedError;
+
+impl StackError for MockRequestOutdatedError {
+    fn debug_fmt(&self, _: usize, _: &mut Vec<String>) {}
+
+    fn next(&self) -> Option<&dyn StackError> {
+        None
+    }
+}
+
+impl ErrorExt for MockRequestOutdatedError {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn status_code(&self) -> StatusCode {
+        StatusCode::RequestOutdated
+    }
+}
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for RequestOutdatedErrorDatanodeHandler {
+    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
+        debug!("Returning mock error for request: {request:?}, peer: {peer:?}");
+        Err(BoxedError::new(MockRequestOutdatedError)).context(error::ExternalSnafu)
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
+
+#[derive(Clone)]
+pub struct NaiveDatanodeHandler;
+
+#[async_trait::async_trait]
+impl MockDatanodeHandler for NaiveDatanodeHandler {
+    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
+        debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
+        Ok(RegionResponse::new(0))
+    }
+
+    async fn handle_query(
+        &self,
+        _peer: &Peer,
+        _request: QueryRequest,
+    ) -> Result<SendableRecordBatchStream> {
+        unreachable!()
+    }
+}
--- a/src/common/meta/src/ddl/tests.rs
+++ b/src/common/meta/src/ddl/tests.rs
@@ -13,6 +13,8 @@
 // limitations under the License.

 mod alter_logical_tables;
+mod alter_table;
 mod create_logical_tables;
 mod create_table;
 mod drop_database;
+mod drop_table;
--- a/src/common/meta/src/ddl/tests/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/tests/alter_logical_tables.rs
@@ -23,8 +23,8 @@ use common_procedure_test::MockContextProvider;
 use crate::ddl::alter_logical_tables::AlterLogicalTablesProcedure;
 use crate::ddl::test_util::alter_table::TestAlterTableExprBuilder;
 use crate::ddl::test_util::columns::TestColumnDefBuilder;
+use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
 use crate::ddl::test_util::{create_logical_table, create_physical_table};
-use crate::ddl::tests::create_logical_tables::NaiveDatanodeHandler;
 use crate::error::Error::{AlterLogicalTablesInvalidArguments, TableNotFound};
 use crate::key::table_name::TableNameKey;
 use crate::rpc::ddl::AlterTableTask;
@@ -128,9 +128,9 @@ async fn test_on_prepare_different_physical_table() {
    let datanode_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(datanode_manager);

-    let phy1_id = create_physical_table(ddl_context.clone(), cluster_id, "phy1").await;
+    let phy1_id = create_physical_table(&ddl_context, cluster_id, "phy1").await;
    create_logical_table(ddl_context.clone(), cluster_id, phy1_id, "table1").await;
-    let phy2_id = create_physical_table(ddl_context.clone(), cluster_id, "phy2").await;
+    let phy2_id = create_physical_table(&ddl_context, cluster_id, "phy2").await;
    create_logical_table(ddl_context.clone(), cluster_id, phy2_id, "table2").await;

    let tasks = vec![
@@ -150,7 +150,7 @@ async fn test_on_prepare_logical_table_not_exists() {
    let ddl_context = new_ddl_context(datanode_manager);

    // Creates physical table
-    let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;

@@ -172,7 +172,7 @@ async fn test_on_prepare() {
    let ddl_context = new_ddl_context(datanode_manager);

    // Creates physical table
-    let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
@@ -196,7 +196,7 @@ async fn test_on_update_metadata() {
    let ddl_context = new_ddl_context(datanode_manager);

    // Creates physical table
-    let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
@@ -233,7 +233,7 @@ async fn test_on_part_duplicate_alter_request() {
    let ddl_context = new_ddl_context(datanode_manager);

    // Creates physical table
-    let phy_id = create_physical_table(ddl_context.clone(), cluster_id, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
--- a/src/common/meta/src/ddl/tests/alter_table.rs
+++ b/src/common/meta/src/ddl/tests/alter_table.rs
@@ -0,0 +1,345 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::assert_matches::assert_matches;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use api::v1::alter_expr::Kind;
+use api::v1::region::{region_request, RegionRequest};
+use api::v1::{
+    AddColumn, AddColumns, AlterExpr, ColumnDataType, ColumnDef as PbColumnDef, DropColumn,
+    DropColumns, SemanticType,
+};
+use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_error::ext::ErrorExt;
+use common_error::status_code::StatusCode;
+use store_api::storage::RegionId;
+use tokio::sync::mpsc::{self};
+
+use crate::ddl::alter_table::AlterTableProcedure;
+use crate::ddl::test_util::alter_table::TestAlterTableExprBuilder;
+use crate::ddl::test_util::create_table::test_create_table_task;
+use crate::ddl::test_util::datanode_handler::{
+    DatanodeWatcher, RequestOutdatedErrorDatanodeHandler,
+};
+use crate::key::table_name::TableNameKey;
+use crate::key::table_route::TableRouteValue;
+use crate::peer::Peer;
+use crate::rpc::ddl::AlterTableTask;
+use crate::rpc::router::{Region, RegionRoute};
+use crate::test_util::{new_ddl_context, MockDatanodeManager};
+
+fn test_rename_alter_table_task(table_name: &str, new_table_name: &str) -> AlterTableTask {
+    let builder = TestAlterTableExprBuilder::default()
+        .table_name(table_name)
+        .new_table_name(new_table_name)
+        .build()
+        .unwrap();
+
+    AlterTableTask {
+        alter_table: builder.into(),
+    }
+}
+
+#[tokio::test]
+async fn test_on_prepare_table_exists_err() {
+    let datanode_manager = Arc::new(MockDatanodeManager::new(()));
+    let ddl_context = new_ddl_context(datanode_manager);
+    let cluster_id = 1;
+    let task = test_create_table_task("foo", 1024);
+    // Puts a value to table name key.
+    ddl_context
+        .table_metadata_manager
+        .create_table_metadata(
+            task.table_info.clone(),
+            TableRouteValue::physical(vec![]),
+            HashMap::new(),
+        )
+        .await
+        .unwrap();
+
+    let task = test_rename_alter_table_task("non-exists", "foo");
+    let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
+    let err = procedure.on_prepare().await.unwrap_err();
+    assert_matches!(err.status_code(), StatusCode::TableAlreadyExists);
+}
+
+#[tokio::test]
+async fn test_on_prepare_table_not_exists_err() {
+    let datanode_manager = Arc::new(MockDatanodeManager::new(()));
+    let ddl_context = new_ddl_context(datanode_manager);
+    let cluster_id = 1;
+    let task = test_rename_alter_table_task("non-exists", "foo");
+    let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
+    let err = procedure.on_prepare().await.unwrap_err();
+    assert_matches!(err.status_code(), StatusCode::TableNotFound);
+}
+
+#[tokio::test]
+async fn test_on_submit_alter_request() {
+    let (tx, mut rx) = mpsc::channel(8);
+    let datanode_handler = DatanodeWatcher(tx);
+    let datanode_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
+    let ddl_context = new_ddl_context(datanode_manager);
+    let cluster_id = 1;
+    let table_id = 1024;
+    let table_name = "foo";
+    let task = test_create_table_task(table_name, table_id);
+    // Puts a value to table name key.
+    ddl_context
+        .table_metadata_manager
+        .create_table_metadata(
+            task.table_info.clone(),
+            TableRouteValue::physical(vec![
+                RegionRoute {
+                    region: Region::new_test(RegionId::new(table_id, 1)),
+                    leader_peer: Some(Peer::empty(1)),
+                    follower_peers: vec![Peer::empty(5)],
+                    leader_status: None,
+                    leader_down_since: None,
+                },
+                RegionRoute {
+                    region: Region::new_test(RegionId::new(table_id, 2)),
+                    leader_peer: Some(Peer::empty(2)),
+                    follower_peers: vec![Peer::empty(4)],
+                    leader_status: None,
+                    leader_down_since: None,
+                },
+                RegionRoute {
+                    region: Region::new_test(RegionId::new(table_id, 3)),
+                    leader_peer: Some(Peer::empty(3)),
+                    follower_peers: vec![],
+                    leader_status: None,
+                    leader_down_since: None,
+                },
+            ]),
+            HashMap::new(),
+        )
+        .await
+        .unwrap();
+
+    let alter_table_task = AlterTableTask {
+        alter_table: AlterExpr {
+            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table_name.to_string(),
+            kind: Some(Kind::DropColumns(DropColumns {
+                drop_columns: vec![DropColumn {
+                    name: "my_field_column".to_string(),
+                }],
+            })),
+        },
+    };
+    let mut procedure =
+        AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
+    procedure.on_prepare().await.unwrap();
+    procedure.submit_alter_region_requests().await.unwrap();
+
+    let check = |peer: Peer,
+                 request: RegionRequest,
+                 expected_peer_id: u64,
+                 expected_region_id: RegionId| {
+        assert_eq!(peer.id, expected_peer_id);
+        let Some(region_request::Body::Alter(req)) = request.body else {
+            unreachable!();
+        };
+        assert_eq!(req.region_id, expected_region_id);
+    };
+
+    let mut results = Vec::new();
+    for _ in 0..3 {
+        let result = rx.try_recv().unwrap();
+        results.push(result);
+    }
+    results.sort_unstable_by(|(a, _), (b, _)| a.id.cmp(&b.id));
+
+    let (peer, request) = results.remove(0);
+    check(peer, request, 1, RegionId::new(table_id, 1));
+    let (peer, request) = results.remove(0);
+    check(peer, request, 2, RegionId::new(table_id, 2));
+    let (peer, request) = results.remove(0);
+    check(peer, request, 3, RegionId::new(table_id, 3));
+}
+
+#[tokio::test]
+async fn test_on_submit_alter_request_with_outdated_request() {
+    let datanode_manager = Arc::new(MockDatanodeManager::new(
+        RequestOutdatedErrorDatanodeHandler,
+    ));
+    let ddl_context = new_ddl_context(datanode_manager);
+    let cluster_id = 1;
+    let table_id = 1024;
+    let table_name = "foo";
+    let task = test_create_table_task(table_name, table_id);
+    // Puts a value to table name key.
+    ddl_context
+        .table_metadata_manager
+        .create_table_metadata(
+            task.table_info.clone(),
+            TableRouteValue::physical(vec![
+                RegionRoute {
+                    region: Region::new_test(RegionId::new(table_id, 1)),
+                    leader_peer: Some(Peer::empty(1)),
+                    follower_peers: vec![Peer::empty(5)],
+                    leader_status: None,
+                    leader_down_since: None,
+                },
+                RegionRoute {
+                    region: Region::new_test(RegionId::new(table_id, 2)),
+                    leader_peer: Some(Peer::empty(2)),
+                    follower_peers: vec![Peer::empty(4)],
+                    leader_status: None,
+                    leader_down_since: None,
+                },
+                RegionRoute {
+                    region: Region::new_test(RegionId::new(table_id, 3)),
+                    leader_peer: Some(Peer::empty(3)),
+                    follower_peers: vec![],
+                    leader_status: None,
+                    leader_down_since: None,
+                },
+            ]),
+            HashMap::new(),
+        )
+        .await
+        .unwrap();
+
+    let alter_table_task = AlterTableTask {
+        alter_table: AlterExpr {
+            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table_name.to_string(),
+            kind: Some(Kind::DropColumns(DropColumns {
+                drop_columns: vec![DropColumn {
+                    name: "my_field_column".to_string(),
+                }],
+            })),
+        },
+    };
+    let mut procedure =
+        AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
+    procedure.on_prepare().await.unwrap();
+    procedure.submit_alter_region_requests().await.unwrap();
+}
+
+#[tokio::test]
+async fn test_on_update_metadata_rename() {
+    let datanode_manager = Arc::new(MockDatanodeManager::new(()));
+    let ddl_context = new_ddl_context(datanode_manager);
+    let cluster_id = 1;
+    let table_name = "foo";
+    let new_table_name = "bar";
+    let table_id = 1024;
+    let task = test_create_table_task(table_name, table_id);
+    // Puts a value to table name key.
+    ddl_context
+        .table_metadata_manager
+        .create_table_metadata(
+            task.table_info.clone(),
+            TableRouteValue::physical(vec![]),
+            HashMap::new(),
+        )
+        .await
+        .unwrap();
+
+    let task = test_rename_alter_table_task(table_name, new_table_name);
+    let mut procedure =
+        AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
+    procedure.on_prepare().await.unwrap();
+    procedure.on_update_metadata().await.unwrap();
+
+    let old_table_name_exists = ddl_context
+        .table_metadata_manager
+        .table_name_manager()
+        .exists(TableNameKey::new(
+            DEFAULT_CATALOG_NAME,
+            DEFAULT_SCHEMA_NAME,
+            table_name,
+        ))
+        .await
+        .unwrap();
+    assert!(!old_table_name_exists);
+    let value = ddl_context
+        .table_metadata_manager
+        .table_name_manager()
+        .get(TableNameKey::new(
+            DEFAULT_CATALOG_NAME,
+            DEFAULT_SCHEMA_NAME,
+            new_table_name,
+        ))
+        .await
+        .unwrap()
+        .unwrap();
+    assert_eq!(value.table_id(), table_id);
+}
+
+#[tokio::test]
+async fn test_on_update_metadata_add_columns() {
+    let datanode_manager = Arc::new(MockDatanodeManager::new(()));
+    let ddl_context = new_ddl_context(datanode_manager);
+    let cluster_id = 1;
+    let table_name = "foo";
+    let table_id = 1024;
+    let task = test_create_table_task(table_name, table_id);
+    // Puts a value to table name key.
+    ddl_context
+        .table_metadata_manager
+        .create_table_metadata(
+            task.table_info.clone(),
+            TableRouteValue::physical(vec![]),
+            HashMap::new(),
+        )
+        .await
+        .unwrap();
+
+    let task = AlterTableTask {
+        alter_table: AlterExpr {
+            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table_name.to_string(),
+            kind: Some(Kind::AddColumns(AddColumns {
+                add_columns: vec![AddColumn {
+                    column_def: Some(PbColumnDef {
+                        name: "my_tag3".to_string(),
+                        data_type: ColumnDataType::String as i32,
+                        semantic_type: SemanticType::Tag as i32,
+                        is_nullable: true,
+                        ..Default::default()
+                    }),
+                    location: None,
+                }],
+            })),
+        },
+    };
+    let mut procedure =
+        AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
+    procedure.on_prepare().await.unwrap();
+    procedure.on_update_metadata().await.unwrap();
+
+    let table_info = ddl_context
+        .table_metadata_manager
+        .table_info_manager()
+        .get(table_id)
+        .await
+        .unwrap()
+        .unwrap()
+        .into_inner()
+        .table_info;
+
+    assert_eq!(
+        table_info.meta.schema.column_schemas.len() as u32,
+        table_info.meta.next_column_id
+    );
+}
--- a/src/common/meta/src/ddl/tests/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/tests/create_logical_tables.rs
@@ -15,25 +15,21 @@
 use std::assert_matches::assert_matches;
 use std::sync::Arc;

-use api::v1::region::{QueryRequest, RegionRequest};
 use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
 use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status};
 use common_procedure_test::MockContextProvider;
-use common_recordbatch::SendableRecordBatchStream;
-use common_telemetry::debug;
 use store_api::storage::RegionId;

-use crate::datanode_manager::HandleResponse;
 use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
+use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
 use crate::ddl::test_util::{
    create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
 };
 use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
-use crate::error::{Error, Result};
+use crate::error::Error;
 use crate::key::table_route::TableRouteValue;
-use crate::peer::Peer;
-use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};
+use crate::test_util::{new_ddl_context, MockDatanodeManager};

 #[tokio::test]
 async fn test_on_prepare_physical_table_not_found() {
@@ -229,25 +225,6 @@ async fn test_on_prepare_part_logical_tables_exist() {
    assert_matches!(status, Status::Executing { persist: true });
 }

-#[derive(Clone)]
-pub struct NaiveDatanodeHandler;
-
-#[async_trait::async_trait]
-impl MockDatanodeHandler for NaiveDatanodeHandler {
-    async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
-        debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
-        Ok(HandleResponse::new(0))
-    }
-
-    async fn handle_query(
-        &self,
-        _peer: &Peer,
-        _request: QueryRequest,
-    ) -> Result<SendableRecordBatchStream> {
-        unreachable!()
-    }
-}
-
 #[tokio::test]
 async fn test_on_create_metadata() {
    let datanode_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
--- a/Show More
+++ b/Show More