fix: some cr comments

feat: data buffer and related structs
feat: Defines structs in the merge tree memtable (#3326 )
2025-12-22 22:20:02 +00:00 · 2024-02-20 14:10:57 +08:00 · 2024-02-19 22:57:25 +08:00 · 2024-02-19 11:43:19 +00:00 · 2024-02-19 10:52:19 +00:00 · 2024-02-19 08:03:41 +00:00
1095 changed files with 80072 additions and 42660 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -12,9 +12,4 @@ rustflags = [
    "-Wclippy::print_stdout",
    "-Wclippy::print_stderr",
    "-Wclippy::implicit_clone",
-
-    # It seems clippy has made a false positive decision here when upgrading rust toolchain to
-    # nightly-2023-08-07, we do need it to be borrowed mutably.
-    # Allow it for now; try disallow it when the toolchain is upgraded in the future.
-    "-Aclippy::needless_pass_by_ref_mut",
 ]
--- a/.env.example
+++ b/.env.example
@@ -19,3 +19,5 @@ GT_GCS_BUCKET = GCS bucket
 GT_GCS_SCOPE  = GCS scope
 GT_GCS_CREDENTIAL_PATH = GCS credential path 
 GT_GCS_ENDPOINT = GCS end point
+# Settings for kafka wal test
+GT_KAFKA_ENDPOINTS = localhost:9092
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -21,6 +21,7 @@ body:
        - Locking issue
        - Performance issue
        - Unexpected error
+        - User Experience
        - Other
    validations:
      required: true
@@ -33,21 +34,40 @@ body:
      multiple: true
      options:
        - Standalone mode
+        - Distributed Cluster
+        - Storage Engine
+        - Query Engine
+        - Table Engine
+        - Write Protocols
+        - MetaSrv
        - Frontend
        - Datanode
-        - Meta
        - Other
    validations:
      required: true

  - type: textarea
-    id: what-happened
+    id: reproduce
    attributes:
-      label: What happened?
+      label: Minimal reproduce step
      description: |
-        Tell us what happened and also what you would have expected to
-        happen instead.
-      placeholder: "Describe the bug"
+        Please walk us through and provide steps and details on how
+        to reproduce the issue. If possible, provide scripts that we
+        can run to trigger the bug.
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected-manner
+    attributes:
+      label: What did you expect to see?
+    validations:
+      required: true
+
+  - type: textarea
+    id: actual-manner
+    attributes:
+      label: What did you see instead?
    validations:
      required: true

@@ -63,6 +83,17 @@ body:
    validations:
      required: true

+  - type: input
+    id: greptimedb
+    attributes:
+      label: What version of GreptimeDB did you use?
+      description: |
+        Please provide the version of GreptimeDB. For example:
+        0.5.1 etc. You can get it by executing command line `greptime --version`.
+      placeholder: "0.5.1"
+    validations:
+      required: true
+
  - type: textarea
    id: logs
    attributes:
@@ -72,14 +103,3 @@ body:
        trace. This will be automatically formatted into code, so no
        need for backticks.
      render: bash
-
-  - type: textarea
-    id: reproduce
-    attributes:
-      label: How can we reproduce the bug?
-      description: |
-        Please walk us through and provide steps and details on how
-        to reproduce the issue. If possible, provide scripts that we
-        can run to trigger the bug.
-    validations:
-      required: true
--- a/.github/actions/build-greptime-binary/action.yml
+++ b/.github/actions/build-greptime-binary/action.yml
@@ -40,9 +40,11 @@ runs:
    - name: Upload artifacts
      uses: ./.github/actions/upload-artifacts
      if: ${{ inputs.build-android-artifacts == 'false' }}
+      env:
+        PROFILE_TARGET: ${{ inputs.cargo-profile == 'dev' && 'debug' || inputs.cargo-profile }}
      with:
        artifacts-dir: ${{ inputs.artifacts-dir }}
-        target-file: ./target/${{ inputs.cargo-profile }}/greptime
+        target-file: ./target/$PROFILE_TARGET/greptime
        version: ${{ inputs.version }}
        working-dir: ${{ inputs.working-dir }}

--- a/.github/actions/build-greptime-images/action.yml
+++ b/.github/actions/build-greptime-images/action.yml
@@ -53,7 +53,7 @@ runs:
      uses: docker/setup-buildx-action@v2

    - name: Download amd64 artifacts
-      uses: actions/download-artifact@v3
+      uses: actions/download-artifact@v4
      with:
        name: ${{ inputs.amd64-artifact-name }}

@@ -66,7 +66,7 @@ runs:
        mv ${{ inputs.amd64-artifact-name }} amd64

    - name: Download arm64 artifacts
-      uses: actions/download-artifact@v3
+      uses: actions/download-artifact@v4
      if: ${{ inputs.arm64-artifact-name }}
      with:
        name: ${{ inputs.arm64-artifact-name }}
--- a/.github/actions/build-windows-artifacts/action.yml
+++ b/.github/actions/build-windows-artifacts/action.yml
@@ -25,7 +25,7 @@ inputs:
 runs:
  using: composite
  steps:
-    - uses: arduino/setup-protoc@v1
+    - uses: arduino/setup-protoc@v3

    - name: Install rust toolchain
      uses: dtolnay/rust-toolchain@master
@@ -38,7 +38,7 @@ runs:
      uses: Swatinem/rust-cache@v2

    - name: Install Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
      with:
        python-version: '3.10'

--- a/.github/actions/publish-github-release/action.yml
+++ b/.github/actions/publish-github-release/action.yml
@@ -15,7 +15,7 @@ runs:
    #   |- greptime-darwin-amd64-v0.5.0.sha256sum/greptime-darwin-amd64-v0.5.0.sha256sum
    #   ...
    - name: Download artifacts
-      uses: actions/download-artifact@v3
+      uses: actions/download-artifact@v4

    - name: Create git tag for release
      if: ${{ github.event_name != 'push' }} # Meaning this is a scheduled or manual workflow.
@@ -31,10 +31,12 @@ runs:
          echo "prerelease=false" >> $GITHUB_ENV
          echo "makeLatest=true" >> $GITHUB_ENV
          echo "generateReleaseNotes=false" >> $GITHUB_ENV
+          echo "omitBody=true" >> $GITHUB_ENV
        else
          echo "prerelease=true" >> $GITHUB_ENV
          echo "makeLatest=false" >> $GITHUB_ENV
          echo "generateReleaseNotes=true" >> $GITHUB_ENV
+          echo "omitBody=false" >> $GITHUB_ENV
        fi

    - name: Publish release
@@ -45,6 +47,7 @@ runs:
        makeLatest: ${{ env.makeLatest }}
        tag: ${{ inputs.version }}
        generateReleaseNotes: ${{ env.generateReleaseNotes }}
+        omitBody: ${{ env.omitBody }} # omitBody is true when the release is a official release.
        allowUpdates: true
        artifacts: |
          **/greptime-*/*
--- a/.github/actions/release-cn-artifacts/action.yaml
+++ b/.github/actions/release-cn-artifacts/action.yaml
@@ -73,7 +73,7 @@ runs:
  using: composite
  steps:
    - name: Download artifacts
-      uses: actions/download-artifact@v3
+      uses: actions/download-artifact@v4
      with:
        path: ${{ inputs.artifacts-dir }}

--- a/.github/actions/upload-artifacts/action.yml
+++ b/.github/actions/upload-artifacts/action.yml
@@ -6,7 +6,7 @@ inputs:
    required: true
  target-file:
    description: The path of the target artifact
-    required: true
+    required: false
  version:
    description: Version of the artifact
    required: true
@@ -18,11 +18,12 @@ runs:
  using: composite
  steps:
    - name: Create artifacts directory
+      if: ${{ inputs.target-file != '' }}
      working-directory: ${{ inputs.working-dir }}
      shell: bash
      run: |
        mkdir -p ${{ inputs.artifacts-dir }} && \
-        mv ${{ inputs.target-file }} ${{ inputs.artifacts-dir }}
+        cp ${{ inputs.target-file }} ${{ inputs.artifacts-dir }}

    # The compressed artifacts will use the following layout:
    # greptime-linux-amd64-pyo3-v0.3.0sha256sum
@@ -49,15 +50,15 @@ runs:
      run: Get-FileHash ${{ inputs.artifacts-dir }}.tar.gz -Algorithm SHA256 | select -ExpandProperty Hash > ${{ inputs.artifacts-dir }}.sha256sum

    # Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39).
-    # However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped.
+    # However, when we use 'actions/download-artifact' to download the artifacts, it will be automatically unzipped.
    - name: Upload artifacts
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
      with:
        name: ${{ inputs.artifacts-dir }}
        path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.tar.gz

    - name: Upload checksum
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
      with:
        name: ${{ inputs.artifacts-dir }}.sha256sum
        path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum
--- a/.github/doc-label-config.yml
+++ b/.github/doc-label-config.yml
@@ -0,0 +1,4 @@
+Doc not needed:
+    - '- \[x\]  This PR does not require documentation updates.'
+Doc update required:
+    - '- \[ \]  This PR does not require documentation updates.'
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -15,5 +15,6 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed

 - [ ]  I have written the necessary rustdoc comments.
 - [ ]  I have added the necessary unit tests and integration tests.
+- [x]  This PR does not require documentation updates.

 ## Refer to a related PR or issue link (optional)
--- a/.github/scripts/deploy-greptimedb.sh
+++ b/.github/scripts/deploy-greptimedb.sh
@@ -107,12 +107,9 @@ function deploy_greptimedb_cluster_with_s3_storage() {
    --set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
    --set storage.s3.region="$AWS_REGION" \
    --set storage.s3.root="$DATA_ROOT" \
-    --set storage.s3.secretName=s3-credentials \
    --set storage.credentials.secretName=s3-credentials \
-    --set storage.credentials.secretCreation.enabled=true \
-    --set storage.credentials.secretCreation.enableEncryption=false \
-    --set storage.credentials.secretCreation.data.access-key-id="$AWS_ACCESS_KEY_ID" \
-    --set storage.credentials.secretCreation.data.secret-access-key="$AWS_SECRET_ACCESS_KEY"
+    --set storage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
+    --set storage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"

  # Wait for greptimedb cluster to be ready.
  while true; do
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -1,7 +1,7 @@
 on:
  push:
    branches:
-      - develop
+      - main
    paths-ignore:
      - 'docs/**'
      - 'config/**'
@@ -13,14 +13,14 @@ on:
 name: Build API docs

 env:
-  RUST_TOOLCHAIN: nightly-2023-08-07
+  RUST_TOOLCHAIN: nightly-2023-12-19

 jobs:
  apidoc:
    runs-on: ubuntu-20.04
    steps:
-    - uses: actions/checkout@v3
-    - uses: arduino/setup-protoc@v1
+    - uses: actions/checkout@v4
+    - uses: arduino/setup-protoc@v3
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
    - uses: dtolnay/rust-toolchain@master
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -55,10 +55,18 @@ on:
        description: Build and push images to DockerHub and ACR
        required: false
        default: true
+      cargo_profile:
+        type: choice
+        description: The cargo profile to use in building GreptimeDB.
+        default: nightly
+        options:
+          - dev
+          - release
+          - nightly

 # Use env variables to control all the release process.
 env:
-  CARGO_PROFILE: nightly
+  CARGO_PROFILE: ${{ inputs.cargo_profile }}

  # Controls whether to run tests, include unit-test, integration-test and sqlness.
  DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
@@ -93,7 +101,7 @@ jobs:
      version: ${{ steps.create-version.outputs.version }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -147,12 +155,12 @@ jobs:
    runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Checkout greptimedb
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: ${{ inputs.repository }}
          ref: ${{ inputs.commit }}
@@ -176,12 +184,12 @@ jobs:
    runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Checkout greptimedb
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: ${{ inputs.repository }}
          ref: ${{ inputs.commit }}
@@ -208,7 +216,7 @@ jobs:
    outputs:
      build-result: ${{ steps.set-build-result.outputs.build-result }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -239,7 +247,7 @@ jobs:
    runs-on: ubuntu-20.04
    continue-on-error: true
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -273,7 +281,7 @@ jobs:
    ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -298,7 +306,7 @@ jobs:
    ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -322,14 +330,14 @@ jobs:
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
-      - name: Notifiy nightly build successful result
+      - name: Notifiy dev build successful result
        uses: slackapi/slack-github-action@v1.23.0
        if: ${{ needs.release-images-to-dockerhub.outputs.build-result == 'success' }}
        with:
          payload: |
            {"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has completed successfully."}

-      - name: Notifiy nightly build failed result
+      - name: Notifiy dev build failed result
        uses: slackapi/slack-github-action@v1.23.0
        if: ${{ needs.release-images-to-dockerhub.outputs.build-result != 'success' }}
        with:
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -9,9 +9,9 @@ on:
      - '.dockerignore'
      - 'docker/**'
      - '.gitignore'
+      - 'grafana/**'
  push:
    branches:
-      - develop
      - main
    paths-ignore:
      - 'docs/**'
@@ -20,6 +20,7 @@ on:
      - '.dockerignore'
      - 'docker/**'
      - '.gitignore'
+      - 'grafana/**'
  workflow_dispatch:

 name: CI
@@ -29,24 +30,26 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2023-08-07
+  RUST_TOOLCHAIN: nightly-2023-12-19

 jobs:
  typos:
    name: Spell Check with Typos
    runs-on: ubuntu-20.04
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: crate-ci/typos@v1.13.10

  check:
    name: Check
-    if: github.event.pull_request.draft == false
-    runs-on: ubuntu-20.04
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ windows-latest, ubuntu-20.04 ]
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v3
-      - uses: arduino/setup-protoc@v1
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
@@ -54,46 +57,117 @@ jobs:
          toolchain: ${{ env.RUST_TOOLCHAIN }}
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
+        with: 
+          # Shares across multiple jobs
+          # Shares with `Clippy` job
+          shared-key: "check-lint"
      - name: Run cargo check
        run: cargo check --locked --workspace --all-targets

  toml:
    name: Toml Check
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: stable
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
+        with: 
+          # Shares across multiple jobs
+          shared-key: "check-toml"
      - name: Install taplo
-        run: cargo +stable install taplo-cli --version ^0.8 --locked
+        run: cargo +stable install taplo-cli --version ^0.9 --locked
      - name: Run taplo
        run: taplo format --check

-  sqlness:
-    name: Sqlness Test
-    if: github.event.pull_request.draft == false
+  build:
+    name: Build GreptimeDB binaries
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-20.04-8-cores ]
+        os: [ ubuntu-20.04 ]
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v3
-      - uses: arduino/setup-protoc@v1
-        with:
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: ${{ env.RUST_TOOLCHAIN }}
-      - name: Rust Cache
-        uses: Swatinem/rust-cache@v2
+      - uses: Swatinem/rust-cache@v2
+        with:
+          # Shares across multiple jobs
+          shared-key: "build-binaries"
+      - name: Build greptime binaries
+        shell: bash
+        run: cargo build
+      - name: Pack greptime binaries
+        shell: bash
+        run: |
+          mkdir bins && \
+          mv ./target/debug/greptime bins && \
+          mv ./target/debug/sqlness-runner bins
+      - name: Print greptime binaries info
+        run: ls -lh bins
+      - name: Upload artifacts
+        uses: ./.github/actions/upload-artifacts
+        with:
+          artifacts-dir: bins
+          version: current
+
+  sqlness:
+    name: Sqlness Test
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ ubuntu-20.04 ]
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+      - name: Download pre-built binaries
+        uses: actions/download-artifact@v4
+        with:
+          name: bins
+          path: .
+      - name: Unzip binaries
+        run: tar -xvf ./bins.tar.gz
      - name: Run sqlness
-        run: cargo sqlness
+        run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins
+      # FIXME: Logs cannot found be on failure (or even success). Need to figure out the cause.
+      - name: Upload sqlness logs
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: sqlness-logs
+          path: ${{ runner.temp }}/greptime-*.log
+          retention-days: 3
+
+  sqlness-kafka-wal:
+    name: Sqlness Test with Kafka Wal
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ ubuntu-20.04 ]
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+      - name: Download pre-built binaries
+        uses: actions/download-artifact@v4
+        with:
+          name: bins
+          path: .
+      - name: Unzip binaries
+        run: tar -xvf ./bins.tar.gz
+      - name: Setup kafka server
+        working-directory: tests-integration/fixtures/kafka
+        run: docker compose -f docker-compose-standalone.yml up -d --wait
+      - name: Run sqlness
+        run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins
+      # FIXME: Logs cannot be found on failure (or even success). Need to figure out the cause.
      - name: Upload sqlness logs
        if: always()
        uses: actions/upload-artifact@v3
@@ -104,12 +178,11 @@ jobs:

  fmt:
    name: Rustfmt
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v3
-      - uses: arduino/setup-protoc@v1
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
@@ -118,17 +191,19 @@ jobs:
          components: rustfmt
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
+        with: 
+          # Shares across multiple jobs
+          shared-key: "check-rust-fmt"
      - name: Run cargo fmt
        run: cargo fmt --all -- --check

  clippy:
    name: Clippy
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v3
-      - uses: arduino/setup-protoc@v1
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
@@ -137,6 +212,10 @@ jobs:
          components: clippy
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
+        with: 
+          # Shares across multiple jobs
+          # Shares with `Check` job
+          shared-key: "check-lint"
      - name: Run cargo clippy
        run: cargo clippy --workspace --all-targets -- -D warnings

@@ -145,8 +224,8 @@ jobs:
    runs-on: ubuntu-20.04-8-cores
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v3
-      - uses: arduino/setup-protoc@v1
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: KyleMayes/install-llvm-action@v1
@@ -159,17 +238,26 @@ jobs:
          components: llvm-tools-preview
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
+        with:
+          # Shares cross multiple jobs
+          shared-key: "coverage-test"
      - name: Install latest nextest release
        uses: taiki-e/install-action@nextest
+      - name: Install cargo-llvm-cov
+        uses: taiki-e/install-action@cargo-llvm-cov
      - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
        with:
          python-version: '3.10'
      - name: Install PyArrow Package
        run: pip install pyarrow
-      - name: Install cargo-llvm-cov
-        uses: taiki-e/install-action@cargo-llvm-cov
-      - name: Collect coverage data
+      - name: Setup etcd server
+        working-directory: tests-integration/fixtures/etcd
+        run: docker compose -f docker-compose-standalone.yml up -d --wait
+      - name: Setup kafka server
+        working-directory: tests-integration/fixtures/kafka
+        run: docker compose -f docker-compose-standalone.yml up -d --wait
+      - name: Run nextest cases
        run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard
        env:
          CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
@@ -179,6 +267,8 @@ jobs:
          GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
          GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
          GT_S3_REGION: ${{ secrets.S3_REGION }}
+          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
+          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          UNITTEST_LOG_DIR: "__unittest_logs"
      - name: Codecov upload
        uses: codecov/codecov-action@v2
--- a/.github/workflows/doc-issue.yml
+++ b/.github/workflows/doc-issue.yml
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
      - name: create an issue in doc repo
-        uses: dacbd/create-issue-action@main
+        uses: dacbd/create-issue-action@v1.2.1
        with:
          owner: GreptimeTeam
          repo: docs
@@ -28,7 +28,7 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
      - name: create an issue in cloud repo
-        uses: dacbd/create-issue-action@main
+        uses: dacbd/create-issue-action@v1.2.1
        with:
          owner: GreptimeTeam
          repo: greptimedb-cloud
--- a/.github/workflows/doc-label.yml
+++ b/.github/workflows/doc-label.yml
@@ -0,0 +1,36 @@
+name: "PR Doc Labeler"
+on:
+  pull_request_target:
+    types: [opened, edited, synchronize, ready_for_review, auto_merge_enabled, labeled, unlabeled]
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  triage:
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
+    runs-on: ubuntu-latest
+    steps:
+    - uses: github/issue-labeler@v3.4
+      with:
+        configuration-path: .github/doc-label-config.yml
+        enable-versioned-regex: false
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        sync-labels: 1
+    - name: create an issue in doc repo
+      uses: dacbd/create-issue-action@v1.2.1
+      if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ]  This PR does not require documentation updates.') }}
+      with:
+        owner: GreptimeTeam
+        repo: docs
+        token: ${{ secrets.DOCS_REPO_TOKEN }}
+        title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
+        body: |
+          A document change request is generated from
+          ${{ github.event.issue.html_url || github.event.pull_request.html_url }}
+    - name: Check doc labels
+      uses: docker://agilepathway/pull-request-label-checker:latest
+      with:
+        one_of: Doc update required,Doc not needed
+        repo_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -9,9 +9,9 @@ on:
      - '.dockerignore'
      - 'docker/**'
      - '.gitignore'
+      - 'grafana/**'
  push:
    branches:
-      - develop
      - main
    paths:
      - 'docs/**'
@@ -20,6 +20,7 @@ on:
      - '.dockerignore'
      - 'docker/**'
      - '.gitignore'
+      - 'grafana/**'
  workflow_dispatch:

 name: CI
@@ -32,39 +33,34 @@ jobs:
    name: Spell Check with Typos
    runs-on: ubuntu-20.04
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: crate-ci/typos@v1.13.10

  check:
    name: Check
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'

  fmt:
    name: Rustfmt
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'

  clippy:
    name: Clippy
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'

  coverage:
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'

  sqlness:
    name: Sqlness Test
-    if: github.event.pull_request.draft == false
    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'
--- a/.github/workflows/license.yaml
+++ b/.github/workflows/license.yaml
@@ -3,7 +3,7 @@ name: License checker
 on:
  push:
    branches:
-    - develop
+    - main
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
 jobs:
@@ -11,6 +11,6 @@ jobs:
    runs-on: ubuntu-20.04
    name: license-header-check
    steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
    - name: Check License Header
-      uses: korandoru/hawkeye@v3
+      uses: korandoru/hawkeye@v4
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -85,7 +85,7 @@ jobs:
      version: ${{ steps.create-version.outputs.version }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -137,7 +137,7 @@ jobs:
    ]
    runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -156,7 +156,7 @@ jobs:
    ]
    runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -179,7 +179,7 @@ jobs:
    outputs:
      nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -211,7 +211,7 @@ jobs:
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
    continue-on-error: true
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -245,7 +245,7 @@ jobs:
    ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -270,7 +270,7 @@ jobs:
    ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -12,19 +12,20 @@ concurrency:
  cancel-in-progress: true

 env:
-  RUST_TOOLCHAIN: nightly-2023-08-07
+  RUST_TOOLCHAIN: nightly-2023-12-19

 jobs:
  sqlness:
    name: Sqlness Test
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ windows-latest-8-cores ]
    timeout-minutes: 60
    steps:
-      - uses: actions/checkout@v4.1.0
-      - uses: arduino/setup-protoc@v1
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - uses: dtolnay/rust-toolchain@master
@@ -51,12 +52,13 @@ jobs:
          retention-days: 3

  test-on-windows:
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    runs-on: windows-latest-8-cores
    timeout-minutes: 60
    steps:
      - run: git config --global core.autocrlf false
-      - uses: actions/checkout@v4.1.0
-      - uses: arduino/setup-protoc@v1
+      - uses: actions/checkout@v4
+      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - name: Install Rust toolchain
@@ -69,7 +71,7 @@ jobs:
      - name: Install Cargo Nextest
        uses: taiki-e/install-action@nextest
      - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
        with:
          python-version: '3.10'
      - name: Install PyArrow Package
--- a/.github/workflows/nightly-funtional-tests.yml
+++ b/.github/workflows/nightly-funtional-tests.yml
@@ -9,10 +9,11 @@ on:
 jobs:
  sqlness-test:
    name: Run sqlness test
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

--- a/.github/workflows/pr-title-checker.yml
+++ b/.github/workflows/pr-title-checker.yml
@@ -13,7 +13,7 @@ jobs:
    runs-on: ubuntu-20.04
    timeout-minutes: 10
    steps:
-      - uses: thehanimo/pr-title-checker@v1.3.4
+      - uses: thehanimo/pr-title-checker@v1.4.2
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          pass_on_octokit_error: false
@@ -22,7 +22,7 @@ jobs:
    runs-on: ubuntu-20.04
    timeout-minutes: 10
    steps:
-      - uses: thehanimo/pr-title-checker@v1.3.4
+      - uses: thehanimo/pr-title-checker@v1.4.2
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          pass_on_octokit_error: false
--- a/.github/workflows/release-dev-builder-images.yaml
+++ b/.github/workflows/release-dev-builder-images.yaml
@@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-20.04-16-cores
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -82,7 +82,7 @@ on:
 # Use env variables to control all the release process.
 env:
  # The arguments of building greptime.
-  RUST_TOOLCHAIN: nightly-2023-08-07
+  RUST_TOOLCHAIN: nightly-2023-12-19
  CARGO_PROFILE: nightly

  # Controls whether to run tests, include unit-test, integration-test and sqlness.
@@ -91,7 +91,7 @@ env:
  # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
  NIGHTLY_RELEASE_PREFIX: nightly
  # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
-  NEXT_RELEASE_VERSION: v0.5.0
+  NEXT_RELEASE_VERSION: v0.7.0

 jobs:
  allocate-runners:
@@ -114,7 +114,7 @@ jobs:
      version: ${{ steps.create-version.outputs.version }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -168,7 +168,7 @@ jobs:
    ]
    runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -187,7 +187,7 @@ jobs:
    ]
    runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -226,7 +226,7 @@ jobs:
    ]
    if: ${{ inputs.build_macos_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -240,6 +240,11 @@ jobs:
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
          artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}

+      - name: Set build macos result
+        id: set-build-macos-result
+        run: |
+          echo "build-macos-result=success" >> $GITHUB_OUTPUT    
+
  build-windows-artifacts:
    name: Build Windows artifacts
    strategy:
@@ -262,7 +267,7 @@ jobs:
    steps:
      - run: git config --global core.autocrlf false

-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -276,6 +281,11 @@ jobs:
          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
          artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}

+      - name: Set build windows result
+        id: set-build-windows-result
+        run: |
+          echo "build-windows-result=success" >> $GITHUB_OUTPUT    
+
  release-images-to-dockerhub:
    name: Build and push images to DockerHub
    if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
@@ -286,7 +296,7 @@ jobs:
    ]
    runs-on: ubuntu-2004-16-cores
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -299,6 +309,11 @@ jobs:
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}

+      - name: Set build image result
+        id: set-image-build-result
+        run: |
+          echo "build-image-result=success" >> $GITHUB_OUTPUT    
+
  release-cn-artifacts:
    name: Release artifacts to CN region
    if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
@@ -316,7 +331,7 @@ jobs:
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
    continue-on-error: true
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -352,7 +367,7 @@ jobs:
    ]
    runs-on: ubuntu-20.04
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -375,7 +390,7 @@ jobs:
    ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -400,7 +415,7 @@ jobs:
    ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -413,3 +428,29 @@ jobs:
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: ${{ vars.EC2_RUNNER_REGION }}
          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+
+  notification:
+    if: ${{ always() }} # Not requiring successful dependent jobs, always run.
+    name: Send notification to Greptime team
+    needs: [
+      release-images-to-dockerhub,
+      build-macos-artifacts,
+      build-windows-artifacts,
+    ]
+    runs-on: ubuntu-20.04
+    env:
+      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
+    steps:
+      - name: Notifiy release successful result
+        uses: slackapi/slack-github-action@v1.25.0
+        if: ${{ needs.release-images-to-dockerhub.outputs.build-image-result == 'success' && needs.build-windows-artifacts.outputs.build-windows-result == 'success' && needs.build-macos-artifacts.outputs.build-macos-result == 'success' }}
+        with:
+          payload: |
+            {"text": "GreptimeDB's release version has completed successfully."}
+
+      - name: Notifiy release failed result
+        uses: slackapi/slack-github-action@v1.25.0
+        if: ${{ needs.release-images-to-dockerhub.outputs.build-image-result != 'success' || needs.build-windows-artifacts.outputs.build-windows-result != 'success' || needs.build-macos-artifacts.outputs.build-macos-result != 'success' }}
+        with:
+          payload: |
+            {"text": "GreptimeDB's release version has failed, please check 'https://github.com/GreptimeTeam/greptimedb/actions/workflows/release.yml'."}
--- a/.github/workflows/size-label.yml
+++ b/.github/workflows/size-label.yml
@@ -1,26 +0,0 @@
-name: size-labeler
-
-on: [pull_request]
-
-jobs:
-  labeler:
-    runs-on: ubuntu-latest
-    name: Label the PR size
-    steps:
-      - uses: codelytv/pr-size-labeler@v1
-        with:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          s_label: 'Size: S'
-          s_max_size: '100'
-          m_label: 'Size: M'
-          m_max_size: '500'
-          l_label: 'Size: L'
-          l_max_size: '1000'
-          xl_label: 'Size: XL'
-          fail_if_xl: 'false'
-          message_if_xl: >
-            This PR exceeds the recommended size of 1000 lines.
-            Please make sure you are NOT addressing multiple issues with one PR.
-            Note this PR might be rejected due to its size.
-          github_api_url: 'api.github.com'
-          files_to_ignore: 'Cargo.lock'
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -10,7 +10,7 @@ Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get th

 It can feel intimidating to contribute to a complex project, but it can also be exciting and fun. These general notes will help everyone participate in this communal activity.

- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md)
+- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md)
 - Small changes make huge differences. We will happily accept a PR making a single character change if it helps move forward. Don't wait to have everything working.
 - Check the closed issues before opening your issue.
 - Try to follow the existing style of the code.
@@ -26,7 +26,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such

 ## Code of Conduct

-Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
+Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.

 ## License

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,9 +29,11 @@ members = [
    "src/common/time",
    "src/common/decimal",
    "src/common/version",
+    "src/common/wal",
    "src/datanode",
    "src/datatypes",
    "src/file-engine",
+    "src/flow",
    "src/frontend",
    "src/log-store",
    "src/meta-client",
@@ -43,35 +45,45 @@ members = [
    "src/partition",
    "src/plugins",
    "src/promql",
+    "src/puffin",
    "src/query",
    "src/script",
    "src/servers",
    "src/session",
    "src/sql",
-    "src/storage",
    "src/store-api",
    "src/table",
+    "src/index",
+    "tests-fuzz",
    "tests-integration",
    "tests/runner",
 ]
 resolver = "2"

 [workspace.package]
-version = "0.4.2"
+version = "0.6.0"
 edition = "2021"
 license = "Apache-2.0"

 [workspace.dependencies]
+ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.3"
 arrow = { version = "47.0" }
 arrow-array = "47.0"
 arrow-flight = "47.0"
+arrow-ipc = "47.0"
 arrow-schema = { version = "47.0", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
+axum = { version = "0.6", features = ["headers"] }
 base64 = "0.21"
 bigdecimal = "0.4.2"
+bitflags = "2.4.1"
+bytemuck = "1.12"
+bytes = { version = "1.5", features = ["serde"] }
 chrono = { version = "0.4", features = ["serde"] }
+clap = { version = "4.4", features = ["derive"] }
+dashmap = "5.4"
 datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
 datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
 datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
@@ -81,14 +93,17 @@ datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev =
 datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
 derive_builder = "0.12"
 etcd-client = "0.12"
+fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "7eb2e78be7a104d2582fbea0bcb1e019407da702" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96f1f0404f421ee560a4310c73c5071e49168168" }
 humantime-serde = "1.1"
 itertools = "0.10"
 lazy_static = "1.4"
 meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
+mockall = "0.11.4"
 moka = "0.12"
+num_cpus = "1.16"
 once_cell = "1.18"
 opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
    "gen-tonic",
@@ -97,30 +112,36 @@ opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.gi
 ] }
 parquet = "47.0"
 paste = "1.0"
+pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
 prost = "0.12"
-raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
+raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.8"
 regex = "1.8"
+regex-automata = { version = "0.2", features = ["transducer"] }
 reqwest = { version = "0.11", default-features = false, features = [
    "json",
    "rustls-tls-native-roots",
    "stream",
 ] }
-rust_decimal = "1.32.0"
+rskafka = "0.5"
+rust_decimal = "1.33"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
-smallvec = "1"
+serde_with = "3"
+smallvec = { version = "1", features = ["serde"] }
 snafu = "0.7"
+sysinfo = "0.30"
 # on branch v0.38.x
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0fbae07d0c46dc18e3381c406d8b9b8abef6b1fd", features = [
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
    "visitor",
 ] }
 strum = { version = "0.25", features = ["derive"] }
 tempfile = "3"
 tokio = { version = "1.28", features = ["full"] }
+tokio-stream = { version = "0.1" }
 tokio-util = { version = "0.7", features = ["io-util", "compat"] }
-toml = "0.7"
+toml = "0.8.8"
 tonic = { version = "0.10", features = ["tls"] }
 uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }

@@ -134,6 +155,7 @@ common-base = { path = "src/common/base" }
 common-catalog = { path = "src/common/catalog" }
 common-config = { path = "src/common/config" }
 common-datasource = { path = "src/common/datasource" }
+common-decimal = { path = "src/common/decimal" }
 common-error = { path = "src/common/error" }
 common-function = { path = "src/common/function" }
 common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" }
@@ -142,7 +164,6 @@ common-grpc-expr = { path = "src/common/grpc-expr" }
 common-macro = { path = "src/common/macro" }
 common-mem-prof = { path = "src/common/mem-prof" }
 common-meta = { path = "src/common/meta" }
-common-pprof = { path = "src/common/pprof" }
 common-procedure = { path = "src/common/procedure" }
 common-procedure-test = { path = "src/common/procedure-test" }
 common-query = { path = "src/common/query" }
@@ -152,26 +173,28 @@ common-telemetry = { path = "src/common/telemetry" }
 common-test-util = { path = "src/common/test-util" }
 common-time = { path = "src/common/time" }
 common-version = { path = "src/common/version" }
+common-wal = { path = "src/common/wal" }
 datanode = { path = "src/datanode" }
 datatypes = { path = "src/datatypes" }
 file-engine = { path = "src/file-engine" }
 frontend = { path = "src/frontend" }
+index = { path = "src/index" }
 log-store = { path = "src/log-store" }
 meta-client = { path = "src/meta-client" }
 meta-srv = { path = "src/meta-srv" }
-mito = { path = "src/mito" }
+metric-engine = { path = "src/metric-engine" }
 mito2 = { path = "src/mito2" }
 object-store = { path = "src/object-store" }
 operator = { path = "src/operator" }
 partition = { path = "src/partition" }
 plugins = { path = "src/plugins" }
 promql = { path = "src/promql" }
+puffin = { path = "src/puffin" }
 query = { path = "src/query" }
 script = { path = "src/script" }
 servers = { path = "src/servers" }
 session = { path = "src/session" }
 sql = { path = "src/sql" }
-storage = { path = "src/storage" }
 store-api = { path = "src/store-api" }
 substrait = { path = "src/common/substrait" }
 table = { path = "src/table" }
@@ -181,7 +204,7 @@ git = "https://github.com/GreptimeTeam/greptime-meter.git"
 rev = "abbd357c1e193cd270ea65ee7652334a150b628f"

 [profile.release]
-debug = true
+debug = 1

 [profile.nightly]
 inherits = "release"
--- a/11
+++ b/11
@@ -65,7 +65,7 @@ endif
 build: ## Build debug version greptime.
 	cargo ${CARGO_EXTENSION} build ${CARGO_BUILD_OPTS}

-.POHNY: build-by-dev-builder
+.PHONY: build-by-dev-builder
 build-by-dev-builder: ## Build greptime by dev-builder.
 	docker run --network=host \
 	-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
@@ -144,11 +144,12 @@ multi-platform-buildx: ## Create buildx multi-platform builder.
 	docker buildx inspect ${BUILDX_BUILDER_NAME} || docker buildx create --name ${BUILDX_BUILDER_NAME} --driver docker-container --bootstrap --use

 ##@ Test
+.PHONY: test
 test: nextest ## Run unit and integration tests.
 	cargo nextest run ${NEXTEST_OPTS}

-.PHONY: nextest ## Install nextest tools.
-nextest:
+.PHONY: nextest
+nextest: ## Install nextest tools.
 	cargo --list | grep nextest || cargo install cargo-nextest --locked

 .PHONY: sqlness-test
@@ -157,11 +158,11 @@ sqlness-test: ## Run sqlness test.

 .PHONY: check
 check: ## Cargo check all the targets.
-	cargo check --workspace --all-targets
+	cargo check --workspace --all-targets --all-features

 .PHONY: clippy
 clippy: ## Check clippy rules.
-	cargo clippy --workspace --all-targets -F pyo3_backend -- -D warnings
+	cargo clippy --workspace --all-targets --all-features -- -D warnings

 .PHONY: fmt-check
 fmt-check: ## Check code format.
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 <p align="center">
  <picture>
-    <source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
-    <source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
-    <img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
+    <source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png">
+    <source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding-dark.png">
+    <img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png" width="400px">
  </picture>
 </p>

@@ -12,11 +12,11 @@
 </h3>

 <p align="center">
-    <a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/develop/graph/badge.svg?token=FITFDI3J3C"></img></a>
+    <a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C"></img></a>
    &nbsp;
    <a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml"><img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="CI"></img></a>
    &nbsp;
-    <a href="https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
+    <a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
 </p>

 <p align="center">
@@ -27,31 +27,19 @@
    <a href="https://greptime.com/slack"><img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack" alt="slack" /></a>
 </p>

-## Upcoming Event
-Come and meet us in **KubeCon + CloudNativeCon North America 2023!**
-<p align="center">
-  <picture>
-    <img alt="KubeCon + CloudNativeCon North Logo" src="./docs/banner/KCCNC_NA_2023_1000x200_Email Banner.png" width="800px">
-  </picture>
-</p>
-
 ## What is GreptimeDB

-GreptimeDB is an open-source time-series database with a special focus on
-scalability, analytical capabilities and efficiency. It's designed to work on
-infrastructure of the cloud era, and users benefit from its elasticity and commodity
-storage.
+GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
+It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.

-Our core developers have been building time-series data platform
-for years. Based on their best-practices, GreptimeDB is born to give you:
+Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:

- A standalone binary that scales to highly-available distributed cluster, providing a transparent experience for cluster users
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends
- Flexible indexes, tackling high cardinality issues down
- Distributed, parallel query execution, leveraging elastic computing resource
- Native SQL, and Python scripting for advanced analytical scenarios
- Widely adopted database protocols and APIs, native PromQL supports
- Extensible table engine architecture for extensive workloads
+- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
+- Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
+- Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
+- Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
+- Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down.
+- Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc.

 ## Quick Start

@@ -108,7 +96,7 @@ Please see the online document site for more installation options and [operation

 ### Get started

-Read the [complete getting started guide](https://docs.greptime.com/getting-started/try-out-greptimedb) on our [official document site](https://docs.greptime.com/).
+Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview) on our [official document site](https://docs.greptime.com/).

 To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview).

@@ -117,7 +105,7 @@ To write and query data, GreptimeDB is compatible with multiple [protocols and c
 ### Installation

 - [Pre-built Binaries](https://greptime.com/download):
-  For Linux and macOS, you can easily download pre-built binaries including official releases and nightly builds that are ready to use. 
+  For Linux and macOS, you can easily download pre-built binaries including official releases and nightly builds that are ready to use.
  In most cases, downloading the version without PyO3 is sufficient. However, if you plan to run scripts in CPython (and use Python packages like NumPy and Pandas), you will need to download the version with PyO3 and install a Python with the same version as the Python in the PyO3 version.
  We recommend using virtualenv for the installation process to manage multiple Python versions.
 - [Docker Images](https://hub.docker.com/r/greptime/greptimedb)(**recommended**): pre-built
@@ -139,10 +127,15 @@ To write and query data, GreptimeDB is compatible with multiple [protocols and c

 - [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp)
 - [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl)
- [GreptimeDB Go Client](https://github.com/GreptimeTeam/greptimedb-client-go)
- [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java)
+- [GreptimeDB Go Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-go)
+- [GreptimeDB Java Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-java)
 - [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
 - [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)
+- [GreptimeDB JavaScript Client](https://github.com/GreptimeTeam/greptime-js-sdk)
+
+### Grafana Dashboard
+
+Our official Grafana dashboard is available at [grafana](./grafana/README.md) directory.

 ## Project Status

@@ -172,18 +165,17 @@ In addition, you may:

 ## License

-GreptimeDB uses the [Apache 2.0 license][1] to strike a balance between
+GreptimeDB uses the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt) to strike a balance between
 open contributions and allowing you to use the software however you want.

-[1]: <https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE>
-
 ## Contributing

 Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.

 ## Acknowledgement
- GreptimeDB uses [Apache Arrow](https://arrow.apache.org/) as the memory model and [Apache Parquet](https://parquet.apache.org/) as the persistent file format.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
- [Apache OpenDAL (incubating)](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
+
+- GreptimeDB uses [Apache Arrow™](https://arrow.apache.org/) as the memory model and [Apache Parquet™](https://parquet.apache.org/) as the persistent file format.
+- GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
+- [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
 - GreptimeDB's meta service is based on [etcd](https://etcd.io/).
 - GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 [dependencies]
 arrow.workspace = true
 chrono.workspace = true
-clap = { version = "4.0", features = ["derive"] }
+clap.workspace = true
 client.workspace = true
 futures-util.workspace = true
 indicatif = "0.17.1"
--- a/benchmarks/src/bin/nyc-taxi.rs
+++ b/benchmarks/src/bin/nyc-taxi.rs
@@ -152,6 +152,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
                .unwrap_or_default(),
            datatype: datatype.into(),
            semantic_type: semantic_type as i32,
+            ..Default::default()
        };
        columns.push(column);
    }
@@ -257,7 +258,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
        catalog_name: CATALOG_NAME.to_string(),
        schema_name: SCHEMA_NAME.to_string(),
        table_name: table_name.to_string(),
-        desc: "".to_string(),
+        desc: String::default(),
        column_defs: vec![
            ColumnDef {
                name: "VendorID".to_string(),
@@ -266,6 +267,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Tag as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "tpep_pickup_datetime".to_string(),
@@ -274,6 +276,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Timestamp as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "tpep_dropoff_datetime".to_string(),
@@ -282,6 +285,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "passenger_count".to_string(),
@@ -290,6 +294,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "trip_distance".to_string(),
@@ -298,6 +303,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "RatecodeID".to_string(),
@@ -306,6 +312,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "store_and_fwd_flag".to_string(),
@@ -314,6 +321,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "PULocationID".to_string(),
@@ -322,6 +330,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "DOLocationID".to_string(),
@@ -330,6 +339,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "payment_type".to_string(),
@@ -338,6 +348,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "fare_amount".to_string(),
@@ -346,6 +357,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "extra".to_string(),
@@ -354,6 +366,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "mta_tax".to_string(),
@@ -362,6 +375,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "tip_amount".to_string(),
@@ -370,6 +384,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "tolls_amount".to_string(),
@@ -378,6 +393,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "improvement_surcharge".to_string(),
@@ -386,6 +402,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "total_amount".to_string(),
@@ -394,6 +411,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "congestion_surcharge".to_string(),
@@ -402,6 +420,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "airport_fee".to_string(),
@@ -410,6 +429,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
        ],
        time_index: "tpep_pickup_datetime".to_string(),
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -12,6 +12,10 @@ rpc_runtime_size = 8
 # It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
 require_lease_before_startup = false

+# Initialize all regions in the background during the startup.
+# By default, it provides services after all regions have been initialized.
+init_regions_in_background = false
+
 [heartbeat]
 # Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
 interval = "3s"
@@ -29,9 +33,11 @@ connect_timeout = "1s"
 # `TCP_NODELAY` option for accepted connections, true by default.
 tcp_nodelay = true

-# WAL options, see `standalone.example.toml`.
+# WAL options.
 [wal]
-# WAL data directory
+provider = "raft_engine"
+
+# Raft-engine wal options, see `standalone.example.toml`.
 # dir = "/tmp/greptimedb/wal"
 file_size = "256MB"
 purge_threshold = "4GB"
@@ -39,10 +45,22 @@ purge_interval = "10m"
 read_batch_size = 128
 sync_write = false

+# Kafka wal options, see `standalone.example.toml`.
+# broker_endpoints = ["127.0.0.1:9092"]
+# Warning: Kafka has a default limit of 1MB per message in a topic.
+# max_batch_size = "1MB"
+# linger = "200ms"
+# consumer_wait_timeout = "100ms"
+# backoff_init = "500ms"
+# backoff_max = "10s"
+# backoff_base = 2
+# backoff_deadline = "5mins"
+
 # Storage options, see `standalone.example.toml`.
 [storage]
 # The working home directory.
 data_home = "/tmp/greptimedb/"
+# Storage type.
 type = "File"
 # TTL for all tables. Disabled by default.
 # global_ttl = "7d"
@@ -53,32 +71,11 @@ type = "File"
 # The local file cache capacity in bytes.
 # cache_capacity = "256MB"

-# Compaction options, see `standalone.example.toml`.
-[storage.compaction]
-max_inflight_tasks = 4
-max_files_in_level0 = 8
-max_purge_tasks = 32
-
-# Storage manifest options
-[storage.manifest]
-# Region checkpoint actions margin.
-# Create a checkpoint every <checkpoint_margin> actions.
-checkpoint_margin = 10
-# Region manifest logs and checkpoints gc execution duration
-gc_duration = '10m'
-
-# Storage flush options
-[storage.flush]
-# Max inflight flush tasks.
-max_flush_tasks = 8
-# Default write buffer size for a region.
-region_write_buffer_size = "32MB"
-# Interval to check whether a region needs flush.
-picker_schedule_interval = "5m"
-# Interval to auto flush a region if it has not flushed yet.
-auto_flush_interval = "1h"
-# Global write buffer size for all regions.
-global_write_buffer_size = "1GB"
+# Custom storage options
+#[[storage.providers]]
+#type = "S3"
+#[[storage.providers]]
+#type = "Gcs"

 # Mito engine options
 [[region_engine]]
@@ -91,27 +88,72 @@ worker_channel_size = 128
 worker_request_batch_size = 64
 # Number of meta action updated to trigger a new checkpoint for the manifest
 manifest_checkpoint_distance = 10
-# Manifest compression type
-manifest_compress_type = "Uncompressed"
+# Whether to compress manifest and checkpoint file by gzip (default false).
+compress_manifest = false
 # Max number of running background jobs
 max_background_jobs = 4
 # Interval to auto flush a region if it has not flushed yet.
 auto_flush_interval = "1h"
-# Global write buffer size for all regions.
+# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
 global_write_buffer_size = "1GB"
-# Global write buffer size threshold to reject write requests (default 2G).
+# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
 global_write_buffer_reject_size = "2GB"
-# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache.
+# Cache size for SST metadata. Setting it to 0 to disable the cache.
+# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
 sst_meta_cache_size = "128MB"
-# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache.
+# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
+# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
 vector_cache_size = "512MB"
+# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
+# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+page_cache_size = "512MB"
 # Buffer size for SST writing.
 sst_write_buffer_size = "8MB"
+# Parallelism to scan a region (default: 1/4 of cpu cores).
+# - 0: using the default value (1/4 of cpu cores).
+# - 1: scan in current thread.
+# - n: scan in parallelism n.
+scan_parallelism = 0
+# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
+parallel_scan_channel_size = 32
+# Whether to allow stale WAL entries read during replay.
+allow_stale_entries = false

+[region_engine.mito.inverted_index]
+# Whether to create the index on flush.
+# - "auto": automatically
+# - "disable": never
+create_on_flush = "auto"
+# Whether to create the index on compaction.
+# - "auto": automatically
+# - "disable": never
+create_on_compaction = "auto"
+# Whether to apply the index on query
+# - "auto": automatically
+# - "disable": never
+apply_on_query = "auto"
+# Memory threshold for performing an external sort during index creation.
+# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
+mem_threshold_on_create = "64MB"
+# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+intermediate_path = ""

-# Log options
+# Log options, see `standalone.example.toml`
 # [logging]
-# Specify logs directory.
 # dir = "/tmp/greptimedb/logs"
-# Specify the log level [info | debug | error | warn]
 # level = "info"
+
+# Datanode export the metrics generated by itself
+# encoded to Prometheus remote-write format
+# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
+# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+# [export_metrics]
+# whether enable export metrics, default is false
+# enable = false
+# The interval of export metrics
+# write_interval = "30s"
+# [export_metrics.remote_write]
+# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
+# url = ""
+# HTTP headers of Prometheus remote-write carry
+# headers = {}
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -1,5 +1,7 @@
 # Node running mode, see `standalone.example.toml`.
 mode = "distributed"
+# The default timezone of the server
+# default_timezone = "UTC"

 [heartbeat]
 # Interval for sending heartbeat task to the Metasrv, 5 seconds by default.
@@ -55,6 +57,9 @@ enable = true
 # Prometheus remote storage options, see `standalone.example.toml`.
 [prom_store]
 enable = true
+# Whether to store the data from Prometheus remote write in metric engine.
+# true by default
+with_metric_engine = true

 # Metasrv client options, see `datanode.example.toml`.
 [meta_client]
@@ -64,6 +69,13 @@ timeout = "3s"
 ddl_timeout = "10s"
 connect_timeout = "1s"
 tcp_nodelay = true
+# The configuration about the cache of the Metadata.
+# default: 100000
+metadata_cache_max_capacity = 100000
+# default: 10m
+metadata_cache_ttl = "10m"
+# default: 5m
+metadata_cache_tti = "5m"

 # Log options, see `standalone.example.toml`
 # [logging]
@@ -77,3 +89,16 @@ tcp_nodelay = true
 timeout = "10s"
 connect_timeout = "10s"
 tcp_nodelay = true
+
+# Frontend export the metrics generated by itself
+# encoded to Prometheus remote-write format
+# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
+# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+# [export_metrics]
+# whether enable export metrics, default is false
+# enable = false
+# The interval of export metrics
+# write_interval = "30s"
+# for `frontend`, `self_import` is recommend to collect metrics generated by itself
+# [export_metrics.self_import]
+# db = "information_schema"
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -7,14 +7,16 @@ server_addr = "127.0.0.1:3002"
 # Etcd server address, "127.0.0.1:2379" by default.
 store_addr = "127.0.0.1:2379"
 # Datanode selector type.
-# - "LeaseBased" (default value).
-# - "LoadBased"
-# For details, please see "https://docs.greptime.com/developer-guide/meta/selector".
-selector = "LeaseBased"
+# - "lease_based" (default value).
+# - "load_based"
+# For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
+selector = "lease_based"
 # Store data in memory, false by default.
 use_memory_store = false
 # Whether to enable greptimedb telemetry, true by default.
 enable_telemetry = true
+# If it's not empty, the metasrv will store all data with this key prefix.
+store_key_prefix = ""

 # Log options, see `standalone.example.toml`
 # [logging]
@@ -42,3 +44,50 @@ first_heartbeat_estimate = "1000ms"
 # timeout = "10s"
 # connect_timeout = "10s"
 # tcp_nodelay = true
+
+[wal]
+# Available wal providers:
+# - "raft_engine" (default)
+# - "kafka"
+provider = "raft_engine"
+
+# There're none raft-engine wal config since meta srv only involves in remote wal currently.
+
+# Kafka wal config.
+# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
+# broker_endpoints = ["127.0.0.1:9092"]
+# Number of topics to be created upon start.
+# num_topics = 64
+# Topic selector type.
+# Available selector types: 
+# - "round_robin" (default)
+# selector_type = "round_robin"
+# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
+# topic_name_prefix = "greptimedb_wal_topic"
+# Expected number of replicas of each partition.
+# replication_factor = 1
+# Above which a topic creation operation will be cancelled.
+# create_topic_timeout = "30s"
+# The initial backoff for kafka clients.
+# backoff_init = "500ms"
+# The maximum backoff for kafka clients.
+# backoff_max = "10s"
+# Exponential backoff rate, i.e. next backoff = base * current backoff.
+# backoff_base = 2
+# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
+# backoff_deadline = "5mins"
+
+# Metasrv export the metrics generated by itself
+# encoded to Prometheus remote-write format
+# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
+# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+# [export_metrics]
+# whether enable export metrics, default is false
+# enable = false
+# The interval of export metrics
+# write_interval = "30s"
+# [export_metrics.remote_write]
+# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
+# url = ""
+# HTTP headers of Prometheus remote-write carry
+# headers = {}
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -2,6 +2,8 @@
 mode = "standalone"
 # Whether to enable greptimedb telemetry, true by default.
 enable_telemetry = true
+# The default timezone of the server
+# default_timezone = "UTC"

 # HTTP server options.
 [http]
@@ -79,9 +81,17 @@ enable = true
 [prom_store]
 # Whether to enable Prometheus remote write and read in HTTP API, true by default.
 enable = true
+# Whether to store the data from Prometheus remote write in metric engine.
+# true by default
+with_metric_engine = true

-# WAL options.
 [wal]
+# Available wal providers:
+# - "raft_engine" (default)
+# - "kafka"
+provider = "raft_engine"
+
+# Raft-engine wal options.
 # WAL data directory
 # dir = "/tmp/greptimedb/wal"
 # WAL file size in bytes.
@@ -94,6 +104,47 @@ purge_interval = "10m"
 read_batch_size = 128
 # Whether to sync log file after every write.
 sync_write = false
+# Whether to reuse logically truncated log files.
+enable_log_recycle = true
+# Whether to pre-create log files on start up
+prefill_log_files = false
+# Duration for fsyncing log files.
+sync_period = "1000ms"
+
+# Kafka wal options.
+# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
+# broker_endpoints = ["127.0.0.1:9092"]
+
+# Number of topics to be created upon start.
+# num_topics = 64
+# Topic selector type.
+# Available selector types: 
+# - "round_robin" (default)
+# selector_type = "round_robin"
+# The prefix of topic name.
+# topic_name_prefix = "greptimedb_wal_topic"
+# The number of replicas of each partition.
+# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
+# replication_factor = 1
+
+# The max size of a single producer batch.
+# Warning: Kafka has a default limit of 1MB per message in a topic.
+# max_batch_size = "1MB"
+# The linger duration.
+# linger = "200ms"
+# The consumer wait timeout.
+# consumer_wait_timeout = "100ms"
+# Create topic timeout.
+# create_topic_timeout = "30s"
+
+# The initial backoff delay.
+# backoff_init = "500ms"
+# The maximum backoff delay.
+# backoff_max = "10s"
+# Exponential backoff rate, i.e. next backoff = base * current backoff.
+# backoff_base = 2
+# The deadline of retries.
+# backoff_deadline = "5mins"

 # Metadata storage options.
 [metadata_store]
@@ -122,35 +173,72 @@ type = "File"
 # The local file cache capacity in bytes.
 # cache_capacity = "256MB"

-# Compaction options.
-[storage.compaction]
-# Max task number that can concurrently run.
-max_inflight_tasks = 4
-# Max files in level 0 to trigger compaction.
-max_files_in_level0 = 8
-# Max task number for SST purge task after compaction.
-max_purge_tasks = 32
+# Custom storage options
+#[[storage.providers]]
+#type = "S3"
+#[[storage.providers]]
+#type = "Gcs"

-# Storage manifest options
-[storage.manifest]
-# Region checkpoint actions margin.
-# Create a checkpoint every <checkpoint_margin> actions.
-checkpoint_margin = 10
-# Region manifest logs and checkpoints gc execution duration
-gc_duration = '10m'
-
-# Storage flush options
-[storage.flush]
-# Max inflight flush tasks.
-max_flush_tasks = 8
-# Default write buffer size for a region.
-region_write_buffer_size = "32MB"
-# Interval to check whether a region needs flush.
-picker_schedule_interval = "5m"
+# Mito engine options
+[[region_engine]]
+[region_engine.mito]
+# Number of region workers
+num_workers = 8
+# Request channel size of each worker
+worker_channel_size = 128
+# Max batch size for a worker to handle requests
+worker_request_batch_size = 64
+# Number of meta action updated to trigger a new checkpoint for the manifest
+manifest_checkpoint_distance = 10
+# Whether to compress manifest and checkpoint file by gzip (default false).
+compress_manifest = false
+# Max number of running background jobs
+max_background_jobs = 4
 # Interval to auto flush a region if it has not flushed yet.
 auto_flush_interval = "1h"
-# Global write buffer size for all regions.
+# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
 global_write_buffer_size = "1GB"
+# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
+global_write_buffer_reject_size = "2GB"
+# Cache size for SST metadata. Setting it to 0 to disable the cache.
+# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
+sst_meta_cache_size = "128MB"
+# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
+# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+vector_cache_size = "512MB"
+# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
+# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
+page_cache_size = "512MB"
+# Buffer size for SST writing.
+sst_write_buffer_size = "8MB"
+# Parallelism to scan a region (default: 1/4 of cpu cores).
+# - 0: using the default value (1/4 of cpu cores).
+# - 1: scan in current thread.
+# - n: scan in parallelism n.
+scan_parallelism = 0
+# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
+parallel_scan_channel_size = 32
+# Whether to allow stale WAL entries read during replay.
+allow_stale_entries = false
+
+[region_engine.mito.inverted_index]
+# Whether to create the index on flush.
+# - "auto": automatically
+# - "disable": never
+create_on_flush = "auto"
+# Whether to create the index on compaction.
+# - "auto": automatically
+# - "disable": never
+create_on_compaction = "auto"
+# Whether to apply the index on query
+# - "auto": automatically
+# - "disable": never
+apply_on_query = "auto"
+# Memory threshold for performing an external sort during index creation.
+# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
+mem_threshold_on_create = "64M"
+# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
+intermediate_path = ""

 # Log options
 # [logging]
@@ -158,3 +246,24 @@ global_write_buffer_size = "1GB"
 # dir = "/tmp/greptimedb/logs"
 # Specify the log level [info | debug | error | warn]
 # level = "info"
+# whether enable tracing, default is false
+# enable_otlp_tracing = false
+# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
+# otlp_endpoint = "localhost:4317"
+# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
+# tracing_sample_ratio = 1.0
+# Whether to append logs to stdout. Defaults to true.
+# append_stdout = true
+
+# Standalone export the metrics generated by itself
+# encoded to Prometheus remote-write format
+# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself)
+# This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
+# [export_metrics]
+# whether enable export metrics, default is false
+# enable = false
+# The interval of export metrics
+# write_interval = "30s"
+# for `standalone`, `self_import` is recommend to collect metrics generated by itself
+# [export_metrics.self_import]
+# db = "information_schema"
--- a/docker/ci/ubuntu/Dockerfile
+++ b/docker/ci/ubuntu/Dockerfile
@@ -1,5 +1,11 @@
 FROM ubuntu:22.04

+# The root path under which contains all the dependencies to build this Dockerfile.
+ARG DOCKER_BUILD_ROOT=.
+# The binary name of GreptimeDB executable.
+# Defaults to "greptime", but sometimes in other projects it might be different.
+ARG TARGET_BIN=greptime
+
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    ca-certificates \
    python3.10 \
@@ -7,14 +13,16 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    python3-pip \
    curl

-COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
+COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt

 RUN python3 -m pip install -r /etc/greptime/requirements.txt

 ARG TARGETARCH

-ADD $TARGETARCH/greptime /greptime/bin/
+ADD $TARGETARCH/$TARGET_BIN /greptime/bin/

 ENV PATH /greptime/bin/:$PATH

-ENTRYPOINT ["greptime"]
+ENV TARGET_BIN=$TARGET_BIN
+
+ENTRYPOINT ["sh", "-c", "exec $TARGET_BIN \"$@\"", "--"]
--- a/docker/dev-builder/centos/Dockerfile
+++ b/docker/dev-builder/centos/Dockerfile
@@ -26,4 +26,5 @@ ARG RUST_TOOLCHAIN
 RUN rustup toolchain install ${RUST_TOOLCHAIN}

 # Install nextest.
-RUN cargo install cargo-nextest --locked
+RUN cargo install cargo-binstall --locked
+RUN cargo binstall cargo-nextest --no-confirm
--- a/docker/dev-builder/ubuntu/Dockerfile
+++ b/docker/dev-builder/ubuntu/Dockerfile
@@ -1,5 +1,8 @@
 FROM ubuntu:20.04

+# The root path under which contains all the dependencies to build this Dockerfile.
+ARG DOCKER_BUILD_ROOT=.
+
 ENV LANG en_US.utf8
 WORKDIR /greptimedb

@@ -27,10 +30,20 @@ RUN apt-get -y purge python3.8 && \
    ln -s /usr/bin/python3.10 /usr/bin/python3 && \
    curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10

-RUN git config --global --add safe.directory /greptimedb
+# Silence all `safe.directory` warnings, to avoid the "detect dubious repository" error when building with submodules.
+# Disabling the safe directory check here won't pose extra security issues, because in our usage for this dev build
+# image, we use it solely on our own environment (that github action's VM, or ECS created dynamically by ourselves),
+# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
+# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
+# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
+# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using 
+# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
+# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
+# it can be a different user that have prepared the submodules.
+RUN git config --global --add safe.directory *

 # Install Python dependencies.
-COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
+COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
 RUN python3 -m pip install -r /etc/greptime/requirements.txt

 # Install Rust.
@@ -43,4 +56,5 @@ ARG RUST_TOOLCHAIN
 RUN rustup toolchain install ${RUST_TOOLCHAIN}

 # Install nextest.
-RUN cargo install cargo-nextest --locked
+RUN cargo install cargo-binstall --locked
+RUN cargo binstall cargo-nextest --no-confirm
--- a/docker/dev-builder/ubuntu/Dockerfile-18.10
+++ b/docker/dev-builder/ubuntu/Dockerfile-18.10
@@ -0,0 +1,48 @@
+# Use the legacy glibc 2.28.
+FROM ubuntu:18.10
+
+ENV LANG en_US.utf8
+WORKDIR /greptimedb
+
+# Use old-releases.ubuntu.com to avoid 404s: https://help.ubuntu.com/community/EOLUpgrades.
+RUN echo "deb http://old-releases.ubuntu.com/ubuntu/ cosmic main restricted universe multiverse\n\
+deb http://old-releases.ubuntu.com/ubuntu/ cosmic-updates main restricted universe multiverse\n\
+deb http://old-releases.ubuntu.com/ubuntu/ cosmic-security main restricted universe multiverse" > /etc/apt/sources.list
+
+# Install dependencies.
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libssl-dev \
+    tzdata \
+    curl \
+    ca-certificates \
+    git \
+    build-essential \
+    unzip \
+    pkg-config
+
+# Install protoc.
+ENV PROTOC_VERSION=25.1
+RUN if [ "$(uname -m)" = "x86_64" ]; then \
+        PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip; \
+    elif [ "$(uname -m)" = "aarch64" ]; then \
+        PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-aarch_64.zip; \
+    else \
+        echo "Unsupported architecture"; exit 1; \
+    fi && \
+    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP} && \
+    unzip -o ${PROTOC_ZIP} -d /usr/local bin/protoc && \
+    unzip -o ${PROTOC_ZIP} -d /usr/local 'include/*' && \
+    rm -f ${PROTOC_ZIP}
+
+# Install Rust.
+SHELL ["/bin/bash", "-c"]
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
+ENV PATH /root/.cargo/bin/:$PATH
+
+# Install Rust toolchains.
+ARG RUST_TOOLCHAIN
+RUN rustup toolchain install ${RUST_TOOLCHAIN}
+
+# Install nextest.
+RUN cargo install cargo-binstall --locked
+RUN cargo binstall cargo-nextest --no-confirm
--- a/docs/rfcs/2023-07-10-metric-engine.md
+++ b/docs/rfcs/2023-07-10-metric-engine.md
@@ -50,10 +50,10 @@ The concept "Table" in GreptimeDB is a bit "heavy" compared to other time-series
 	```

 The following parts will describe these implementation details:
-    - How to route these metric region tables and how those table are distributed
-    - How to maintain the schema and other metadata of the underlying mito engine table
-    - How to maintain the schema of metric engine table
-    - How the query goes
+- How to route these metric region tables and how those table are distributed
+- How to maintain the schema and other metadata of the underlying mito engine table
+- How to maintain the schema of metric engine table
+- How the query goes

 ## Routing

--- a/docs/rfcs/2023-11-03-inverted-index.md
+++ b/docs/rfcs/2023-11-03-inverted-index.md
@@ -1,6 +1,6 @@
 ---
 Feature Name: Inverted Index for SST File
-Tracking Issue: TBD
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/2705
 Date: 2023-11-03
 Author: "Zhong Zhenchi <zhongzc_arch@outlook.com>"
 ---
--- a/docs/rfcs/2023-12-22-enclose-column-id.md
+++ b/docs/rfcs/2023-12-22-enclose-column-id.md
@@ -0,0 +1,44 @@
+---
+Feature Name: Enclose Column Id
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/2982
+Date: 2023-12-22
+Author: "Ruihang Xia <waynestxia@gmail.com>"
+---
+
+# Summary
+This RFC proposes to enclose the usage of `ColumnId` into the region engine only.
+
+# Motivation
+`ColumnId` is an identifier for columns. It's assigned by meta server, stored in `TableInfo` and `RegionMetadata` and used in region engine to distinguish columns.
+
+At present, Both Frontend, Datanode and Metasrv are aware of `ColumnId` but it's only used in region engine. Thus this RFC proposes to remove it from Frontend (mainly used in `TableInfo`) and Metasrv.
+
+# Details
+
+`ColumnId` is used widely on both read and write paths. Removing it from Frontend and Metasrv implies several things:
+
+- A column may have different column id in different regions.
+- A column is identified by its name in all components.
+- Column order in the region engine is not restricted, i.e., no need to be in the same order with table info.
+
+The first thing doesn't matter IMO. This concept doesn't exist anymore outside of region server, and each region is autonomous and independent -- the only guarantee it should hold is those columns exist. But if we consider region repartition, where the SST file would be re-assign to different regions, things would become a bit more complicated. A possible solution is store the relation between name and ColumnId in the manifest, but it's out of the scope of this RFC. We can likely give a workaround by introducing a indirection mapping layer of different version of partitions.
+
+And more importantly, we can still assume columns have the same column ids across regions. We have procedure to maintain consistency between regions and the region engine should ensure alterations are idempotent. So it is possible that region repartition doesn't need to consider column ids or other region metadata in the future.
+
+Users write and query column by their names, not by ColumnId or something else. The second point also means to change the column reference in ScanRequest from index to name. This change can hugely alleviate the misuse of the column index, which has given us many surprises.
+
+And for the last one, column order only matters in table info. This order is used in user-faced table structure operation, like add column, describe column or as the default order of INSERT clause. None of them is connected with the order in storage.
+
+# Drawback
+Firstly, this is a breaking change. Delivering this change requires a full upgrade of the cluster. Secondly, this change may introduce some performance regression. For example, we have to pass the full table name in the `ScanRequest` instead of the `ColumnId`. But this influence is very limited, since the column index is only used in the region engine.
+
+# Alternatives
+
+There are two alternatives from the perspective of "what can be used as the column identifier":
+
+- Index of column to the table schema
+- `ColumnId` of that column
+
+The first one is what we are using now. By choosing this way, it's required to keep the column order in the region engine the same as the table info. This is not hard to achieve, but it's a bit annoying. And things become tricky when there is internal column or different schemas like those stored in file format. And this is the initial purpose of this RFC, which is trying to decouple the table schema and region schema.
+
+The second one, in other hand, requires the `ColumnId` should be identical in all regions and `TableInfo`. It has the same drawback with the previous alternative, that the `TableInfo` and `RegionMetadata` are tighted together. Another point is that the `ColumnId` is assigned by the Metasrv, who doesn't need it but have to maintain it. And this also limits the functionality of `ColumnId`, by taking the ability of assigning it from concrete region engine.
--- a/docs/rfcs/2024-01-17-dataflow-framework.md
+++ b/docs/rfcs/2024-01-17-dataflow-framework.md
@@ -0,0 +1,97 @@
+---
+Feature Name: Dataflow Framework
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/3187
+Date: 2024-01-17
+Author: "Discord9 <discord9@163.com>"
+---
+
+# Summary
+This RFC proposes a Lightweight Module for executing continuous aggregation queries on a stream of data.
+
+# Motivation
+Being able to do continuous aggregation is a very powerful tool. It allows you to do things like:
+1. downsample data from i.e. 1 milliseconds to 1 second
+2. calculate the average of a stream of data
+3. Keeping a sliding window of data in memory
+In order to do those things while maintaining a low memory footprint, you need to be able to manage the data in a smart way. Hence, we only store necessary data in memory, and send/recv data deltas to/from the client.
+
+# Details
+
+## System boundary / What it's and isn't
+- GreptimeFlow provides a way to perform continuous aggregation over time-series data.
+- It's not a complete streaming-processing system. Only a must subset functionalities are provided.
+- Flow can process a configured range of fresh data. Data exceeding this range will be dropped directly. Thus it cannot handle random datasets (random on timestamp).
+- Both sliding windows (e.g., latest 5m from present) and fixed windows (every 5m from some time) are supported. And these two are the major targeting scenarios.
+- Flow can handle most aggregate operators within one table(i.e. Sum, avg, min, max and comparison operators). But others (join, trigger, txn etc.) are not the target feature.
+
+## Framework
+- Greptime Flow's is built on top of [Hydroflow](https://github.com/hydro-project/hydroflow).
+- We have three choices for the Dataflow/Streaming process framework for our simple continuous aggregation feature:
+1. Based on the timely/differential dataflow crate that [materialize](https://github.com/MaterializeInc/materialize) based on. Later, it's proved too obscure for a simple usage, and is hard to customize memory usage control.
+2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
+3. Based on a simple and lower level dataflow framework that someone else write, like [hydroflow](https://github.com/hydro-project/hydroflow), this approach combines the best of both worlds. Firstly, it boasts ease of comprehension and customization. Secondly, the dataflow framework offers precisely the necessary features for crafting uncomplicated single-node dataflow programs while delivering decent performance.
+
+Hence, we choose the third option, and use a simple logical plan that's anagonistic to the underlying dataflow framework, as it only describe how the dataflow graph should be doing, not how it do that. And we built operator in hydroflow to execute the plan. And the result hydroflow graph is wrapped in a engine that only support data in/out and tick event to flush and compute the result. This provide a thin middle layer that's easy to maintain and allow switching to other dataflow framework if necessary.
+
+## Deploy mode and protocol
+- Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
+- It accepts insert request Rows, which is used between frontend and datanode.
+- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
+- It also persists results in the format of Rows to frontend.
+- The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
+- Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
+
+The workflow is shown in the following diagram
+```mermaid
+graph TB
+subgraph Flownode["Flownode"]
+    subgraph Dataflows
+        df1("Dataflow_1")
+        df2("Dataflow_2")
+    end
+end
+subgraph Frontend["Frontend"]
+    newLines["Mirror Insert
+Create Task From Query
+Write result from flow node"]
+end
+
+subgraph Datanode["Datanode"]
+end
+
+User --> Frontend
+Frontend -->|Register Task| Metasrv
+Metasrv -->|Read Task Metadata| Frontend
+Frontend -->|Create Task| Flownode
+
+Frontend -->|Mirror Insert| Flownode
+Flownode -->|Write back| Frontend
+
+Frontend --> Datanode
+Datanode --> Frontend
+
+```
+
+## Lifecycle of data
+- New data is inserted into frontend like before. Frontend will mirror insert request to Flow node if there is configured flow job.
+- Depending on the timestamp of incoming data, flow will either drop it (outdated data) or process it (fresh data).
+- Greptime Flow will periodically write results back to the result table through frontend.
+- Those result will then be written into a result table stored in datanode.
+- A small table of intermediate state is kept in memory, which is used to calculate the result.
+## Supported operations
+- Greptime Flow accepts a configurable "materialize window", data point exceeds that time window is discarded.
+- Data within that "materialize window" is queryable and updateable.
+- Greptime Flow can handle partitioning, if and only if the input query can be transformed to a fully partitioned plan according to the existing commutative rules. Otherwise the corresponding flow job has to be calculated in a single node.
+- Notice that Greptime Flow has to see all the data belongs to one partition.
+- Deletion and duplicate insertion are not supported at early stage.
+## Miscellaneous 
+- Greptime Flow can translate SQL to it's own plan, however only a selected few aggregate function is supported for now, like min/max/sum/count/avg
+- Greptime Flow's operator is configurable in terms of the size of the materialize window, whether to allow delay of incoming data etc., so simplest operator can choose to not tolerate any delay to save memory.
+
+# Future Work
+- Support UDF that can do one-to-one mapping. Preferably, we can reuse the UDF mechanism in GreptimeDB.
+- Support join operator.
+- Design syntax for config operator for different materialize window and delay tolerance.
+- Support cross partition merge operator that allows complex query plan that not necessary accord with partitioning rule to communicate between nodes and create final materialize result.
+- Duplicate insertion, which can be reverted easily within the current framework, so supporting it could be easy
+- Deletion within "materialize window", this requires operators like min/max to store all inputs within materialize window, which might require further optimization.
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -0,0 +1,10 @@
+Grafana dashboard for GreptimeDB
+--------------------------------
+
+GreptimeDB's official Grafana dashboard.
+
+Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
+
+# How to use
+
+Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.
--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2023-08-07"
+channel = "nightly-2023-12-19"
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -6,11 +6,13 @@ license.workspace = true

 [dependencies]
 common-base.workspace = true
+common-decimal.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 common-time.workspace = true
 datatypes.workspace = true
 greptime-proto.workspace = true
+paste = "1.0"
 prost.workspace = true
 snafu.workspace = true
 tonic.workspace = true
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -15,6 +15,8 @@
 use std::sync::Arc;

 use common_base::BitVec;
+use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION};
+use common_decimal::Decimal128;
 use common_time::interval::IntervalUnit;
 use common_time::time::Time;
 use common_time::timestamp::TimeUnit;
@@ -26,47 +28,71 @@ use datatypes::types::{
 };
 use datatypes::value::{OrderedF32, OrderedF64, Value};
 use datatypes::vectors::{
-    BinaryVector, BooleanVector, DateTimeVector, DateVector, DurationMicrosecondVector,
-    DurationMillisecondVector, DurationNanosecondVector, DurationSecondVector, Float32Vector,
-    Float64Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
-    IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
-    TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
-    TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
-    UInt64Vector, VectorRef,
+    BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector,
+    DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
+    DurationSecondVector, Float32Vector, Float64Vector, Int32Vector, Int64Vector,
+    IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector, PrimitiveVector,
+    StringVector, TimeMicrosecondVector, TimeMillisecondVector, TimeNanosecondVector,
+    TimeSecondVector, TimestampMicrosecondVector, TimestampMillisecondVector,
+    TimestampNanosecondVector, TimestampSecondVector, UInt32Vector, UInt64Vector, VectorRef,
 };
+use greptime_proto::v1;
+use greptime_proto::v1::column_data_type_extension::TypeExt;
 use greptime_proto::v1::ddl_request::Expr;
 use greptime_proto::v1::greptime_request::Request;
 use greptime_proto::v1::query_request::Query;
 use greptime_proto::v1::value::ValueData;
-use greptime_proto::v1::{self, DdlRequest, IntervalMonthDayNano, QueryRequest, Row, SemanticType};
+use greptime_proto::v1::{
+    ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, QueryRequest, Row, SemanticType,
+};
+use paste::paste;
 use snafu::prelude::*;

 use crate::error::{self, Result};
 use crate::v1::column::Values;
 use crate::v1::{Column, ColumnDataType, Value as GrpcValue};

-#[derive(Debug, PartialEq, Eq)]
-pub struct ColumnDataTypeWrapper(ColumnDataType);
+/// ColumnDataTypeWrapper is a wrapper of ColumnDataType and ColumnDataTypeExtension.
+/// It could be used to convert with ConcreteDataType.
+#[derive(Debug, PartialEq)]
+pub struct ColumnDataTypeWrapper {
+    datatype: ColumnDataType,
+    datatype_ext: Option<ColumnDataTypeExtension>,
+}

 impl ColumnDataTypeWrapper {
-    pub fn try_new(datatype: i32) -> Result<Self> {
+    /// Try to create a ColumnDataTypeWrapper from i32(ColumnDataType) and ColumnDataTypeExtension.
+    pub fn try_new(datatype: i32, datatype_ext: Option<ColumnDataTypeExtension>) -> Result<Self> {
        let datatype = ColumnDataType::try_from(datatype)
            .context(error::UnknownColumnDataTypeSnafu { datatype })?;
-        Ok(Self(datatype))
+        Ok(Self {
+            datatype,
+            datatype_ext,
+        })
    }

-    pub fn new(datatype: ColumnDataType) -> Self {
-        Self(datatype)
+    /// Create a ColumnDataTypeWrapper from ColumnDataType and ColumnDataTypeExtension.
+    pub fn new(datatype: ColumnDataType, datatype_ext: Option<ColumnDataTypeExtension>) -> Self {
+        Self {
+            datatype,
+            datatype_ext,
+        }
    }

+    /// Get the ColumnDataType.
    pub fn datatype(&self) -> ColumnDataType {
-        self.0
+        self.datatype
+    }
+
+    /// Get a tuple of ColumnDataType and ColumnDataTypeExtension.
+    pub fn to_parts(&self) -> (ColumnDataType, Option<ColumnDataTypeExtension>) {
+        (self.datatype, self.datatype_ext.clone())
    }
 }

 impl From<ColumnDataTypeWrapper> for ConcreteDataType {
-    fn from(datatype: ColumnDataTypeWrapper) -> Self {
-        match datatype.0 {
+    fn from(datatype_wrapper: ColumnDataTypeWrapper) -> Self {
+        match datatype_wrapper.datatype {
            ColumnDataType::Boolean => ConcreteDataType::boolean_datatype(),
            ColumnDataType::Int8 => ConcreteDataType::int8_datatype(),
            ColumnDataType::Int16 => ConcreteDataType::int16_datatype(),
@@ -109,6 +135,100 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
                ConcreteDataType::duration_microsecond_datatype()
            }
            ColumnDataType::DurationNanosecond => ConcreteDataType::duration_nanosecond_datatype(),
+            ColumnDataType::Decimal128 => {
+                if let Some(TypeExt::DecimalType(d)) = datatype_wrapper
+                    .datatype_ext
+                    .as_ref()
+                    .and_then(|datatype_ext| datatype_ext.type_ext.as_ref())
+                {
+                    ConcreteDataType::decimal128_datatype(d.precision as u8, d.scale as i8)
+                } else {
+                    ConcreteDataType::decimal128_default_datatype()
+                }
+            }
+        }
+    }
+}
+
+/// This macro is used to generate datatype functions
+/// with lower style for ColumnDataTypeWrapper.
+///
+///
+/// For example: we can use `ColumnDataTypeWrapper::int8_datatype()`,
+/// to get a ColumnDataTypeWrapper with datatype `ColumnDataType::Int8`.
+macro_rules! impl_column_type_functions {
+    ($($Type: ident), +) => {
+        paste! {
+            impl ColumnDataTypeWrapper {
+                $(
+                    pub fn [<$Type:lower _datatype>]() -> ColumnDataTypeWrapper {
+                        ColumnDataTypeWrapper {
+                            datatype: ColumnDataType::$Type,
+                            datatype_ext: None,
+                        }
+                    }
+                )+
+            }
+        }
+    }
+}
+
+/// This macro is used to generate datatype functions
+/// with snake style for ColumnDataTypeWrapper.
+///
+///
+/// For example: we can use `ColumnDataTypeWrapper::duration_second_datatype()`,
+/// to get a ColumnDataTypeWrapper with datatype `ColumnDataType::DurationSecond`.
+macro_rules! impl_column_type_functions_with_snake {
+    ($($TypeName: ident), +) => {
+        paste!{
+            impl ColumnDataTypeWrapper {
+                $(
+                    pub fn [<$TypeName:snake _datatype>]() -> ColumnDataTypeWrapper {
+                        ColumnDataTypeWrapper {
+                            datatype: ColumnDataType::$TypeName,
+                            datatype_ext: None,
+                        }
+                    }
+                )+
+            }
+        }
+    };
+}
+
+impl_column_type_functions!(
+    Boolean, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Float32, Float64, Binary,
+    Date, Datetime, String
+);
+
+impl_column_type_functions_with_snake!(
+    TimestampSecond,
+    TimestampMillisecond,
+    TimestampMicrosecond,
+    TimestampNanosecond,
+    TimeSecond,
+    TimeMillisecond,
+    TimeMicrosecond,
+    TimeNanosecond,
+    IntervalYearMonth,
+    IntervalDayTime,
+    IntervalMonthDayNano,
+    DurationSecond,
+    DurationMillisecond,
+    DurationMicrosecond,
+    DurationNanosecond
+);
+
+impl ColumnDataTypeWrapper {
+    pub fn decimal128_datatype(precision: i32, scale: i32) -> Self {
+        ColumnDataTypeWrapper {
+            datatype: ColumnDataType::Decimal128,
+            datatype_ext: Some(ColumnDataTypeExtension {
+                type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
+                    precision,
+                    scale,
+                })),
+            }),
        }
    }
 }
@@ -117,7 +237,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
    type Error = error::Error;

    fn try_from(datatype: ConcreteDataType) -> Result<Self> {
-        let datatype = ColumnDataTypeWrapper(match datatype {
+        let column_datatype = match datatype {
            ConcreteDataType::Boolean(_) => ColumnDataType::Boolean,
            ConcreteDataType::Int8(_) => ColumnDataType::Int8,
            ConcreteDataType::Int16(_) => ColumnDataType::Int16,
@@ -156,13 +276,30 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
                DurationType::Microsecond(_) => ColumnDataType::DurationMicrosecond,
                DurationType::Nanosecond(_) => ColumnDataType::DurationNanosecond,
            },
+            ConcreteDataType::Decimal128(_) => ColumnDataType::Decimal128,
            ConcreteDataType::Null(_)
            | ConcreteDataType::List(_)
            | ConcreteDataType::Dictionary(_) => {
                return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
            }
-        });
-        Ok(datatype)
+        };
+        let datatype_extension = match column_datatype {
+            ColumnDataType::Decimal128 => {
+                datatype
+                    .as_decimal128()
+                    .map(|decimal_type| ColumnDataTypeExtension {
+                        type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
+                            precision: decimal_type.precision() as i32,
+                            scale: decimal_type.scale() as i32,
+                        })),
+                    })
+            }
+            _ => None,
+        };
+        Ok(Self {
+            datatype: column_datatype,
+            datatype_ext: datatype_extension,
+        })
    }
 }

@@ -288,6 +425,10 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values
            duration_nanosecond_values: Vec::with_capacity(capacity),
            ..Default::default()
        },
+        ColumnDataType::Decimal128 => Values {
+            decimal128_values: Vec::with_capacity(capacity),
+            ..Default::default()
+        },
    }
 }

@@ -341,6 +482,7 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
            TimeUnit::Microsecond => values.duration_microsecond_values.push(val.value()),
            TimeUnit::Nanosecond => values.duration_nanosecond_values.push(val.value()),
        },
+        Value::Decimal128(val) => values.decimal128_values.push(convert_to_pb_decimal128(val)),
        Value::List(_) => unreachable!(),
    });
    column.null_mask = null_mask.into_vec();
@@ -381,17 +523,26 @@ fn ddl_request_type(request: &DdlRequest) -> &'static str {
 }

 /// Converts an i128 value to google protobuf type [IntervalMonthDayNano].
-pub fn convert_i128_to_interval(v: i128) -> IntervalMonthDayNano {
+pub fn convert_i128_to_interval(v: i128) -> v1::IntervalMonthDayNano {
    let interval = Interval::from_i128(v);
    let (months, days, nanoseconds) = interval.to_month_day_nano();
-    IntervalMonthDayNano {
+    v1::IntervalMonthDayNano {
        months,
        days,
        nanoseconds,
    }
 }

-pub fn pb_value_to_value_ref(value: &v1::Value) -> ValueRef {
+/// Convert common decimal128 to grpc decimal128 without precision and scale.
+pub fn convert_to_pb_decimal128(v: Decimal128) -> v1::Decimal128 {
+    let (hi, lo) = v.split_value();
+    v1::Decimal128 { hi, lo }
+}
+
+pub fn pb_value_to_value_ref<'a>(
+    value: &'a v1::Value,
+    datatype_ext: &'a Option<ColumnDataTypeExtension>,
+) -> ValueRef<'a> {
    let Some(value) = &value.value_data else {
        return ValueRef::Null;
    };
@@ -426,9 +577,9 @@ pub fn pb_value_to_value_ref(value: &v1::Value) -> ValueRef {
        ValueData::TimeMillisecondValue(t) => ValueRef::Time(Time::new_millisecond(*t)),
        ValueData::TimeMicrosecondValue(t) => ValueRef::Time(Time::new_microsecond(*t)),
        ValueData::TimeNanosecondValue(t) => ValueRef::Time(Time::new_nanosecond(*t)),
-        ValueData::IntervalYearMonthValues(v) => ValueRef::Interval(Interval::from_i32(*v)),
-        ValueData::IntervalDayTimeValues(v) => ValueRef::Interval(Interval::from_i64(*v)),
-        ValueData::IntervalMonthDayNanoValues(v) => {
+        ValueData::IntervalYearMonthValue(v) => ValueRef::Interval(Interval::from_i32(*v)),
+        ValueData::IntervalDayTimeValue(v) => ValueRef::Interval(Interval::from_i64(*v)),
+        ValueData::IntervalMonthDayNanoValue(v) => {
            let interval = Interval::from_month_day_nano(v.months, v.days, v.nanoseconds);
            ValueRef::Interval(interval)
        }
@@ -436,6 +587,28 @@ pub fn pb_value_to_value_ref(value: &v1::Value) -> ValueRef {
        ValueData::DurationMillisecondValue(v) => ValueRef::Duration(Duration::new_millisecond(*v)),
        ValueData::DurationMicrosecondValue(v) => ValueRef::Duration(Duration::new_microsecond(*v)),
        ValueData::DurationNanosecondValue(v) => ValueRef::Duration(Duration::new_nanosecond(*v)),
+        ValueData::Decimal128Value(v) => {
+            // get precision and scale from datatype_extension
+            if let Some(TypeExt::DecimalType(d)) = datatype_ext
+                .as_ref()
+                .and_then(|column_ext| column_ext.type_ext.as_ref())
+            {
+                ValueRef::Decimal128(Decimal128::from_value_precision_scale(
+                    v.hi,
+                    v.lo,
+                    d.precision as u8,
+                    d.scale as i8,
+                ))
+            } else {
+                // If the precision and scale are not set, use the default value.
+                ValueRef::Decimal128(Decimal128::from_value_precision_scale(
+                    v.hi,
+                    v.lo,
+                    DECIMAL128_MAX_PRECISION,
+                    DECIMAL128_DEFAULT_SCALE,
+                ))
+            }
+        }
    }
 }

@@ -522,6 +695,11 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
                values.duration_nanosecond_values,
            )),
        },
+        ConcreteDataType::Decimal128(d) => Arc::new(Decimal128Vector::from_values(
+            values.decimal128_values.iter().map(|x| {
+                Decimal128::from_value_precision_scale(x.hi, x.lo, d.precision(), d.scale()).into()
+            }),
+        )),
        ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
            unreachable!()
        }
@@ -692,6 +870,18 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
            .into_iter()
            .map(|v| Value::Duration(Duration::new_nanosecond(v)))
            .collect(),
+        ConcreteDataType::Decimal128(d) => values
+            .decimal128_values
+            .into_iter()
+            .map(|v| {
+                Value::Decimal128(Decimal128::from_value_precision_scale(
+                    v.hi,
+                    v.lo,
+                    d.precision(),
+                    d.scale(),
+                ))
+            })
+            .collect(),
        ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
            unreachable!()
        }
@@ -704,12 +894,14 @@ pub fn is_semantic_type_eq(type_value: i32, semantic_type: SemanticType) -> bool
 }

 /// Returns true if the pb type value is valid.
-pub fn is_column_type_value_eq(type_value: i32, expect_type: &ConcreteDataType) -> bool {
-    let Ok(column_type) = ColumnDataType::try_from(type_value) else {
-        return false;
-    };
-
-    is_column_type_eq(column_type, expect_type)
+pub fn is_column_type_value_eq(
+    type_value: i32,
+    type_extension: Option<ColumnDataTypeExtension>,
+    expect_type: &ConcreteDataType,
+) -> bool {
+    ColumnDataTypeWrapper::try_new(type_value, type_extension)
+        .map(|wrapper| ConcreteDataType::from(wrapper) == *expect_type)
+        .unwrap_or(false)
 }

 /// Convert value into proto's value.
@@ -791,13 +983,13 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
        },
        Value::Interval(v) => match v.unit() {
            IntervalUnit::YearMonth => v1::Value {
-                value_data: Some(ValueData::IntervalYearMonthValues(v.to_i32())),
+                value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
            },
            IntervalUnit::DayTime => v1::Value {
-                value_data: Some(ValueData::IntervalDayTimeValues(v.to_i64())),
+                value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
            },
            IntervalUnit::MonthDayNano => v1::Value {
-                value_data: Some(ValueData::IntervalMonthDayNanoValues(
+                value_data: Some(ValueData::IntervalMonthDayNanoValue(
                    convert_i128_to_interval(v.to_i128()),
                )),
            },
@@ -816,13 +1008,16 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
                value_data: Some(ValueData::DurationNanosecondValue(v.value())),
            },
        },
+        Value::Decimal128(v) => v1::Value {
+            value_data: Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
+        },
        Value::List(_) => return None,
    };

    Some(proto_value)
 }

-/// Returns the [ColumnDataType] of the value.
+/// Returns the [ColumnDataTypeWrapper] of the value.
 ///
 /// If value is null, returns `None`.
 pub fn proto_value_type(value: &v1::Value) -> Option<ColumnDataType> {
@@ -850,72 +1045,18 @@ pub fn proto_value_type(value: &v1::Value) -> Option<ColumnDataType> {
        ValueData::TimeMillisecondValue(_) => ColumnDataType::TimeMillisecond,
        ValueData::TimeMicrosecondValue(_) => ColumnDataType::TimeMicrosecond,
        ValueData::TimeNanosecondValue(_) => ColumnDataType::TimeNanosecond,
-        ValueData::IntervalYearMonthValues(_) => ColumnDataType::IntervalYearMonth,
-        ValueData::IntervalDayTimeValues(_) => ColumnDataType::IntervalDayTime,
-        ValueData::IntervalMonthDayNanoValues(_) => ColumnDataType::IntervalMonthDayNano,
+        ValueData::IntervalYearMonthValue(_) => ColumnDataType::IntervalYearMonth,
+        ValueData::IntervalDayTimeValue(_) => ColumnDataType::IntervalDayTime,
+        ValueData::IntervalMonthDayNanoValue(_) => ColumnDataType::IntervalMonthDayNano,
        ValueData::DurationSecondValue(_) => ColumnDataType::DurationSecond,
        ValueData::DurationMillisecondValue(_) => ColumnDataType::DurationMillisecond,
        ValueData::DurationMicrosecondValue(_) => ColumnDataType::DurationMicrosecond,
        ValueData::DurationNanosecondValue(_) => ColumnDataType::DurationNanosecond,
+        ValueData::Decimal128Value(_) => ColumnDataType::Decimal128,
    };
    Some(value_type)
 }

-/// Convert [ConcreteDataType] to [ColumnDataType].
-pub fn to_column_data_type(data_type: &ConcreteDataType) -> Option<ColumnDataType> {
-    let column_data_type = match data_type {
-        ConcreteDataType::Boolean(_) => ColumnDataType::Boolean,
-        ConcreteDataType::Int8(_) => ColumnDataType::Int8,
-        ConcreteDataType::Int16(_) => ColumnDataType::Int16,
-        ConcreteDataType::Int32(_) => ColumnDataType::Int32,
-        ConcreteDataType::Int64(_) => ColumnDataType::Int64,
-        ConcreteDataType::UInt8(_) => ColumnDataType::Uint8,
-        ConcreteDataType::UInt16(_) => ColumnDataType::Uint16,
-        ConcreteDataType::UInt32(_) => ColumnDataType::Uint32,
-        ConcreteDataType::UInt64(_) => ColumnDataType::Uint64,
-        ConcreteDataType::Float32(_) => ColumnDataType::Float32,
-        ConcreteDataType::Float64(_) => ColumnDataType::Float64,
-        ConcreteDataType::Binary(_) => ColumnDataType::Binary,
-        ConcreteDataType::String(_) => ColumnDataType::String,
-        ConcreteDataType::Date(_) => ColumnDataType::Date,
-        ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
-        ConcreteDataType::Timestamp(TimestampType::Second(_)) => ColumnDataType::TimestampSecond,
-        ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => {
-            ColumnDataType::TimestampMillisecond
-        }
-        ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => {
-            ColumnDataType::TimestampMicrosecond
-        }
-        ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => {
-            ColumnDataType::TimestampNanosecond
-        }
-        ConcreteDataType::Time(TimeType::Second(_)) => ColumnDataType::TimeSecond,
-        ConcreteDataType::Time(TimeType::Millisecond(_)) => ColumnDataType::TimeMillisecond,
-        ConcreteDataType::Time(TimeType::Microsecond(_)) => ColumnDataType::TimeMicrosecond,
-        ConcreteDataType::Time(TimeType::Nanosecond(_)) => ColumnDataType::TimeNanosecond,
-        ConcreteDataType::Duration(DurationType::Second(_)) => ColumnDataType::DurationSecond,
-        ConcreteDataType::Duration(DurationType::Millisecond(_)) => {
-            ColumnDataType::DurationMillisecond
-        }
-        ConcreteDataType::Duration(DurationType::Microsecond(_)) => {
-            ColumnDataType::DurationMicrosecond
-        }
-        ConcreteDataType::Duration(DurationType::Nanosecond(_)) => {
-            ColumnDataType::DurationNanosecond
-        }
-        ConcreteDataType::Interval(IntervalType::YearMonth(_)) => ColumnDataType::IntervalYearMonth,
-        ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => {
-            ColumnDataType::IntervalMonthDayNano
-        }
-        ConcreteDataType::Interval(IntervalType::DayTime(_)) => ColumnDataType::IntervalDayTime,
-        ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
-            return None
-        }
-    };
-
-    Some(column_data_type)
-}
-
 pub fn vectors_to_rows<'a>(
    columns: impl Iterator<Item = &'a VectorRef>,
    row_count: usize,
@@ -962,10 +1103,10 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
                TimeUnit::Nanosecond => ValueData::TimeNanosecondValue(v.value()),
            }),
            Value::Interval(v) => Some(match v.unit() {
-                IntervalUnit::YearMonth => ValueData::IntervalYearMonthValues(v.to_i32()),
-                IntervalUnit::DayTime => ValueData::IntervalDayTimeValues(v.to_i64()),
+                IntervalUnit::YearMonth => ValueData::IntervalYearMonthValue(v.to_i32()),
+                IntervalUnit::DayTime => ValueData::IntervalDayTimeValue(v.to_i64()),
                IntervalUnit::MonthDayNano => {
-                    ValueData::IntervalMonthDayNanoValues(convert_i128_to_interval(v.to_i128()))
+                    ValueData::IntervalMonthDayNanoValue(convert_i128_to_interval(v.to_i128()))
                }
            }),
            Value::Duration(v) => Some(match v.unit() {
@@ -974,20 +1115,12 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
                TimeUnit::Microsecond => ValueData::DurationMicrosecondValue(v.value()),
                TimeUnit::Nanosecond => ValueData::DurationNanosecondValue(v.value()),
            }),
+            Value::Decimal128(v) => Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
            Value::List(_) => unreachable!(),
        },
    }
 }

-/// Returns true if the column type is equal to expected type.
-fn is_column_type_eq(column_type: ColumnDataType, expect_type: &ConcreteDataType) -> bool {
-    if let Some(expect) = to_column_data_type(expect_type) {
-        column_type == expect
-    } else {
-        false
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;
@@ -1081,189 +1214,204 @@ mod tests {
        let values = values_with_capacity(ColumnDataType::DurationMillisecond, 2);
        let values = values.duration_millisecond_values;
        assert_eq!(2, values.capacity());
+
+        let values = values_with_capacity(ColumnDataType::Decimal128, 2);
+        let values = values.decimal128_values;
+        assert_eq!(2, values.capacity());
    }

    #[test]
    fn test_concrete_datatype_from_column_datatype() {
        assert_eq!(
            ConcreteDataType::boolean_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Boolean).into()
+            ColumnDataTypeWrapper::boolean_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::int8_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Int8).into()
+            ColumnDataTypeWrapper::int8_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::int16_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Int16).into()
+            ColumnDataTypeWrapper::int16_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::int32_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Int32).into()
+            ColumnDataTypeWrapper::int32_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::int64_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Int64).into()
+            ColumnDataTypeWrapper::int64_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::uint8_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Uint8).into()
+            ColumnDataTypeWrapper::uint8_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::uint16_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Uint16).into()
+            ColumnDataTypeWrapper::uint16_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::uint32_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Uint32).into()
+            ColumnDataTypeWrapper::uint32_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::uint64_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Uint64).into()
+            ColumnDataTypeWrapper::uint64_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::float32_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Float32).into()
+            ColumnDataTypeWrapper::float32_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::float64_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Float64).into()
+            ColumnDataTypeWrapper::float64_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::binary_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Binary).into()
+            ColumnDataTypeWrapper::binary_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::string_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::String).into()
+            ColumnDataTypeWrapper::string_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::date_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Date).into()
+            ColumnDataTypeWrapper::date_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::datetime_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
+            ColumnDataTypeWrapper::datetime_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::timestamp_millisecond_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
+            ColumnDataTypeWrapper::timestamp_millisecond_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
-            ColumnDataTypeWrapper(ColumnDataType::TimeMillisecond).into()
+            ColumnDataTypeWrapper::time_millisecond_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::interval_datatype(IntervalUnit::DayTime),
-            ColumnDataTypeWrapper(ColumnDataType::IntervalDayTime).into()
+            ColumnDataTypeWrapper::interval_day_time_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::interval_datatype(IntervalUnit::YearMonth),
-            ColumnDataTypeWrapper(ColumnDataType::IntervalYearMonth).into()
+            ColumnDataTypeWrapper::interval_year_month_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
-            ColumnDataTypeWrapper(ColumnDataType::IntervalMonthDayNano).into()
+            ColumnDataTypeWrapper::interval_month_day_nano_datatype().into()
        );
        assert_eq!(
            ConcreteDataType::duration_millisecond_datatype(),
-            ColumnDataTypeWrapper(ColumnDataType::DurationMillisecond).into()
+            ColumnDataTypeWrapper::duration_millisecond_datatype().into()
+        );
+        assert_eq!(
+            ConcreteDataType::decimal128_datatype(10, 2),
+            ColumnDataTypeWrapper::decimal128_datatype(10, 2).into()
        )
    }

    #[test]
    fn test_column_datatype_from_concrete_datatype() {
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Boolean),
+            ColumnDataTypeWrapper::boolean_datatype(),
            ConcreteDataType::boolean_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Int8),
+            ColumnDataTypeWrapper::int8_datatype(),
            ConcreteDataType::int8_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Int16),
+            ColumnDataTypeWrapper::int16_datatype(),
            ConcreteDataType::int16_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Int32),
+            ColumnDataTypeWrapper::int32_datatype(),
            ConcreteDataType::int32_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Int64),
+            ColumnDataTypeWrapper::int64_datatype(),
            ConcreteDataType::int64_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Uint8),
+            ColumnDataTypeWrapper::uint8_datatype(),
            ConcreteDataType::uint8_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Uint16),
+            ColumnDataTypeWrapper::uint16_datatype(),
            ConcreteDataType::uint16_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Uint32),
+            ColumnDataTypeWrapper::uint32_datatype(),
            ConcreteDataType::uint32_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Uint64),
+            ColumnDataTypeWrapper::uint64_datatype(),
            ConcreteDataType::uint64_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Float32),
+            ColumnDataTypeWrapper::float32_datatype(),
            ConcreteDataType::float32_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Float64),
+            ColumnDataTypeWrapper::float64_datatype(),
            ConcreteDataType::float64_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Binary),
+            ColumnDataTypeWrapper::binary_datatype(),
            ConcreteDataType::binary_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::String),
+            ColumnDataTypeWrapper::string_datatype(),
            ConcreteDataType::string_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Date),
+            ColumnDataTypeWrapper::date_datatype(),
            ConcreteDataType::date_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::Datetime),
+            ColumnDataTypeWrapper::datetime_datatype(),
            ConcreteDataType::datetime_datatype().try_into().unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond),
+            ColumnDataTypeWrapper::timestamp_millisecond_datatype(),
            ConcreteDataType::timestamp_millisecond_datatype()
                .try_into()
                .unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::IntervalYearMonth),
+            ColumnDataTypeWrapper::interval_year_month_datatype(),
            ConcreteDataType::interval_datatype(IntervalUnit::YearMonth)
                .try_into()
                .unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::IntervalDayTime),
+            ColumnDataTypeWrapper::interval_day_time_datatype(),
            ConcreteDataType::interval_datatype(IntervalUnit::DayTime)
                .try_into()
                .unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::IntervalMonthDayNano),
+            ColumnDataTypeWrapper::interval_month_day_nano_datatype(),
            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano)
                .try_into()
                .unwrap()
        );
        assert_eq!(
-            ColumnDataTypeWrapper(ColumnDataType::DurationMillisecond),
+            ColumnDataTypeWrapper::duration_millisecond_datatype(),
            ConcreteDataType::duration_millisecond_datatype()
                .try_into()
                .unwrap()
        );

+        assert_eq!(
+            ColumnDataTypeWrapper::decimal128_datatype(10, 2),
+            ConcreteDataType::decimal128_datatype(10, 2)
+                .try_into()
+                .unwrap()
+        );
+
        let result: Result<ColumnDataTypeWrapper> = ConcreteDataType::null_datatype().try_into();
        assert!(result.is_err());
        assert_eq!(
@@ -1290,6 +1438,7 @@ mod tests {
            }),
            null_mask: vec![],
            datatype: 0,
+            ..Default::default()
        };

        let vector = Arc::new(TimestampNanosecondVector::from_vec(vec![1, 2, 3]));
@@ -1331,6 +1480,7 @@ mod tests {
            }),
            null_mask: vec![],
            datatype: 0,
+            ..Default::default()
        };

        let vector = Arc::new(TimeNanosecondVector::from_vec(vec![1, 2, 3]));
@@ -1372,6 +1522,7 @@ mod tests {
            }),
            null_mask: vec![],
            datatype: 0,
+            ..Default::default()
        };

        let vector = Arc::new(IntervalYearMonthVector::from_vec(vec![1, 2, 3]));
@@ -1416,6 +1567,7 @@ mod tests {
            }),
            null_mask: vec![],
            datatype: 0,
+            ..Default::default()
        };

        let vector = Arc::new(DurationNanosecondVector::from_vec(vec![1, 2, 3]));
@@ -1460,6 +1612,7 @@ mod tests {
            }),
            null_mask: vec![2],
            datatype: ColumnDataType::Boolean as i32,
+            ..Default::default()
        };
        let row_count = 4;

@@ -1617,17 +1770,17 @@ mod tests {
            &ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType)),
            Values {
                interval_month_day_nano_values: vec![
-                    IntervalMonthDayNano {
+                    v1::IntervalMonthDayNano {
                        months: 1,
                        days: 2,
                        nanoseconds: 3,
                    },
-                    IntervalMonthDayNano {
+                    v1::IntervalMonthDayNano {
                        months: 5,
                        days: 6,
                        nanoseconds: 7,
                    },
-                    IntervalMonthDayNano {
+                    v1::IntervalMonthDayNano {
                        months: 9,
                        days: 10,
                        nanoseconds: 11,
@@ -1859,4 +2012,33 @@ mod tests {
        assert_eq!(values[6], ValueData::DateValue(30));
        assert_eq!(values[7], ValueData::StringValue("c".to_string()));
    }
+
+    #[test]
+    fn test_is_column_type_value_eq() {
+        // test column type eq
+        let column1 = Column {
+            column_name: "test".to_string(),
+            semantic_type: 0,
+            values: Some(Values {
+                bool_values: vec![false, true, true],
+                ..Default::default()
+            }),
+            null_mask: vec![2],
+            datatype: ColumnDataType::Boolean as i32,
+            datatype_extension: None,
+        };
+        assert!(is_column_type_value_eq(
+            column1.datatype,
+            column1.datatype_extension,
+            &ConcreteDataType::boolean_datatype(),
+        ));
+    }
+
+    #[test]
+    fn test_convert_to_pb_decimal128() {
+        let decimal = Decimal128::new(123, 3, 1);
+        let pb_decimal = convert_to_pb_decimal128(decimal);
+        assert_eq!(pb_decimal.lo, 123);
+        assert_eq!(pb_decimal.hi, 0);
+    }
 }
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -22,7 +22,10 @@ use crate::helper::ColumnDataTypeWrapper;
 use crate::v1::ColumnDef;

 pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
-    let data_type = ColumnDataTypeWrapper::try_new(column_def.data_type)?;
+    let data_type = ColumnDataTypeWrapper::try_new(
+        column_def.data_type,
+        column_def.datatype_extension.clone(),
+    )?;

    let constraint = if column_def.default_constraint.is_empty() {
        None
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -10,6 +10,7 @@ testing = []
 [dependencies]
 api.workspace = true
 arc-swap = "1.0"
+arrow.workspace = true
 arrow-schema.workspace = true
 async-stream.workspace = true
 async-trait = "0.1"
@@ -23,20 +24,23 @@ common-recordbatch.workspace = true
 common-runtime.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
-dashmap = "5.4"
+common-version.workspace = true
+dashmap.workspace = true
 datafusion.workspace = true
 datatypes.workspace = true
 futures = "0.3"
 futures-util.workspace = true
+itertools.workspace = true
 lazy_static.workspace = true
 meta-client.workspace = true
-moka = { workspace = true, features = ["future"] }
+moka = { workspace = true, features = ["future", "sync"] }
 parking_lot = "0.12"
 partition.workspace = true
+paste = "1.0"
 prometheus.workspace = true
 regex.workspace = true
 serde.workspace = true
-serde_json = "1.0"
+serde_json.workspace = true
 session.workspace = true
 snafu.workspace = true
 store-api.workspace = true
@@ -49,5 +53,4 @@ chrono.workspace = true
 common-test-util.workspace = true
 log-store.workspace = true
 object-store.workspace = true
-storage.workspace = true
 tokio.workspace = true
--- a/src/catalog/src/error.rs
+++ b/src/catalog/src/error.rs
@@ -41,6 +41,14 @@ pub enum Error {
        source: BoxedError,
    },

+    #[snafu(display("Failed to list {}.{}'s tables", catalog, schema))]
+    ListTables {
+        location: Location,
+        catalog: String,
+        schema: String,
+        source: BoxedError,
+    },
+
    #[snafu(display("Failed to re-compile script due to internal error"))]
    CompileScriptInternal {
        location: Location,
@@ -156,6 +164,15 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to find table partitions: #{table}"))]
+    FindPartitions {
+        source: partition::error::Error,
+        table: String,
+    },
+
+    #[snafu(display("Failed to find region routes"))]
+    FindRegionRoutes { source: partition::error::Error },
+
    #[snafu(display("Failed to read system catalog table records"))]
    ReadSystemCatalog {
        location: Location,
@@ -180,7 +197,7 @@ pub enum Error {
        source: table::error::Error,
    },

-    #[snafu(display(""))]
+    #[snafu(display("Internal error"))]
    Internal {
        location: Location,
        source: BoxedError,
@@ -216,7 +233,7 @@ pub enum Error {
    #[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
    QueryAccessDenied { catalog: String, schema: String },

-    #[snafu(display(""))]
+    #[snafu(display("DataFusion error"))]
    Datafusion {
        #[snafu(source)]
        error: DataFusionError,
@@ -246,11 +263,14 @@ impl ErrorExt for Error {
        match self {
            Error::InvalidKey { .. }
            | Error::SchemaNotFound { .. }
-            | Error::TableNotFound { .. }
            | Error::CatalogNotFound { .. }
+            | Error::FindPartitions { .. }
+            | Error::FindRegionRoutes { .. }
            | Error::InvalidEntryType { .. }
            | Error::ParallelOpenTable { .. } => StatusCode::Unexpected,

+            Error::TableNotFound { .. } => StatusCode::TableNotFound,
+
            Error::SystemCatalog { .. }
            | Error::EmptyValue { .. }
            | Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,
@@ -270,9 +290,9 @@ impl ErrorExt for Error {
                StatusCode::InvalidArguments
            }

-            Error::ListCatalogs { source, .. } | Error::ListSchemas { source, .. } => {
-                source.status_code()
-            }
+            Error::ListCatalogs { source, .. }
+            | Error::ListSchemas { source, .. }
+            | Error::ListTables { source, .. } => source.status_code(),

            Error::OpenSystemCatalog { source, .. }
            | Error::CreateSystemCatalog { source, .. }
@@ -333,7 +353,7 @@ mod tests {
        assert_eq!(
            StatusCode::StorageUnavailable,
            Error::SystemCatalog {
-                msg: "".to_string(),
+                msg: String::default(),
                location: Location::generate(),
            }
            .status_code()
--- a/src/catalog/src/information_schema.rs
+++ b/src/catalog/src/information_schema.rs
@@ -13,16 +13,27 @@
 // limitations under the License.

 mod columns;
+mod key_column_usage;
+mod memory_table;
+mod partitions;
+mod predicate;
+mod region_peers;
+mod runtime_metrics;
+mod schemata;
+mod table_names;
 mod tables;

 use std::collections::HashMap;
 use std::sync::{Arc, Weak};

-use common_catalog::consts::INFORMATION_SCHEMA_NAME;
+use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
 use common_error::ext::BoxedError;
-use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
+use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
 use datatypes::schema::SchemaRef;
 use futures_util::StreamExt;
+use lazy_static::lazy_static;
+use paste::paste;
+pub(crate) use predicate::Predicates;
 use snafu::ResultExt;
 use store_api::data_source::DataSource;
 use store_api::storage::{ScanRequest, TableId};
@@ -32,46 +43,150 @@ use table::metadata::{
 };
 use table::thin_table::{ThinTable, ThinTableAdapter};
 use table::TableRef;
+pub use table_names::*;

 use self::columns::InformationSchemaColumns;
 use crate::error::Result;
+use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
+use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
+use crate::information_schema::partitions::InformationSchemaPartitions;
+use crate::information_schema::region_peers::InformationSchemaRegionPeers;
+use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
+use crate::information_schema::schemata::InformationSchemaSchemata;
 use crate::information_schema::tables::InformationSchemaTables;
 use crate::CatalogManager;

-pub const TABLES: &str = "tables";
-pub const COLUMNS: &str = "columns";
+lazy_static! {
+    // Memory tables in `information_schema`.
+    static ref MEMORY_TABLES: &'static [&'static str] = &[
+        ENGINES,
+        COLUMN_PRIVILEGES,
+        COLUMN_STATISTICS,
+        CHARACTER_SETS,
+        COLLATIONS,
+        COLLATION_CHARACTER_SET_APPLICABILITY,
+        CHECK_CONSTRAINTS,
+        EVENTS,
+        FILES,
+        OPTIMIZER_TRACE,
+        PARAMETERS,
+        PROFILING,
+        REFERENTIAL_CONSTRAINTS,
+        ROUTINES,
+        SCHEMA_PRIVILEGES,
+        TABLE_PRIVILEGES,
+        TRIGGERS,
+        GLOBAL_STATUS,
+        SESSION_STATUS,
+        PARTITIONS,
+    ];
+}

+macro_rules! setup_memory_table {
+    ($name: expr) => {
+        paste! {
+            {
+                let (schema, columns) = get_schema_columns($name);
+                Some(Arc::new(MemoryTable::new(
+                    consts::[<INFORMATION_SCHEMA_ $name  _TABLE_ID>],
+                    $name,
+                    schema,
+                    columns
+                )) as _)
+            }
+        }
+    };
+}
+
+/// The `information_schema` tables info provider.
 pub struct InformationSchemaProvider {
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
+    tables: HashMap<String, TableRef>,
 }

 impl InformationSchemaProvider {
    pub fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
-        Self {
+        let mut provider = Self {
            catalog_name,
            catalog_manager,
-        }
+            tables: HashMap::new(),
+        };
+
+        provider.build_tables();
+
+        provider
    }

-    /// Build a map of [TableRef] in information schema.
-    /// Including `tables` and `columns`.
-    pub fn build(
-        catalog_name: String,
-        catalog_manager: Weak<dyn CatalogManager>,
-    ) -> HashMap<String, TableRef> {
-        let provider = Self::new(catalog_name, catalog_manager);
+    /// Returns table names in the order of table id.
+    pub fn table_names(&self) -> Vec<String> {
+        let mut tables = self.tables.values().clone().collect::<Vec<_>>();

-        let mut schema = HashMap::new();
-        schema.insert(TABLES.to_owned(), provider.table(TABLES).unwrap());
-        schema.insert(COLUMNS.to_owned(), provider.table(COLUMNS).unwrap());
-        schema
+        tables.sort_by(|t1, t2| {
+            t1.table_info()
+                .table_id()
+                .partial_cmp(&t2.table_info().table_id())
+                .unwrap()
+        });
+        tables
+            .into_iter()
+            .map(|t| t.table_info().name.clone())
+            .collect()
    }

+    /// Returns a map of [TableRef] in information schema.
+    pub fn tables(&self) -> &HashMap<String, TableRef> {
+        assert!(!self.tables.is_empty());
+
+        &self.tables
+    }
+
+    /// Returns the [TableRef] by table name.
    pub fn table(&self, name: &str) -> Option<TableRef> {
+        self.tables.get(name).cloned()
+    }
+
+    fn build_tables(&mut self) {
+        let mut tables = HashMap::new();
+
+        // Carefully consider the tables that may expose sensitive cluster configurations,
+        // authentication details, and other critical information.
+        // Only put these tables under `greptime` catalog to prevent info leak.
+        if self.catalog_name == DEFAULT_CATALOG_NAME {
+            tables.insert(
+                RUNTIME_METRICS.to_string(),
+                self.build_table(RUNTIME_METRICS).unwrap(),
+            );
+            tables.insert(
+                BUILD_INFO.to_string(),
+                self.build_table(BUILD_INFO).unwrap(),
+            );
+            tables.insert(
+                REGION_PEERS.to_string(),
+                self.build_table(REGION_PEERS).unwrap(),
+            );
+        }
+
+        tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
+        tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
+        tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
+        tables.insert(
+            KEY_COLUMN_USAGE.to_string(),
+            self.build_table(KEY_COLUMN_USAGE).unwrap(),
+        );
+
+        // Add memory tables
+        for name in MEMORY_TABLES.iter() {
+            tables.insert((*name).to_string(), self.build_table(name).expect(name));
+        }
+
+        self.tables = tables;
+    }
+
+    fn build_table(&self, name: &str) -> Option<TableRef> {
        self.information_table(name).map(|table| {
            let table_info = Self::table_info(self.catalog_name.clone(), &table);
-            let filter_pushdown = FilterPushDownType::Unsupported;
+            let filter_pushdown = FilterPushDownType::Inexact;
            let thin_table = ThinTable::new(table_info, filter_pushdown);

            let data_source = Arc::new(InformationTableDataSource::new(table));
@@ -89,6 +204,45 @@ impl InformationSchemaProvider {
                self.catalog_name.clone(),
                self.catalog_manager.clone(),
            )) as _),
+            ENGINES => setup_memory_table!(ENGINES),
+            COLUMN_PRIVILEGES => setup_memory_table!(COLUMN_PRIVILEGES),
+            COLUMN_STATISTICS => setup_memory_table!(COLUMN_STATISTICS),
+            BUILD_INFO => setup_memory_table!(BUILD_INFO),
+            CHARACTER_SETS => setup_memory_table!(CHARACTER_SETS),
+            COLLATIONS => setup_memory_table!(COLLATIONS),
+            COLLATION_CHARACTER_SET_APPLICABILITY => {
+                setup_memory_table!(COLLATION_CHARACTER_SET_APPLICABILITY)
+            }
+            CHECK_CONSTRAINTS => setup_memory_table!(CHECK_CONSTRAINTS),
+            EVENTS => setup_memory_table!(EVENTS),
+            FILES => setup_memory_table!(FILES),
+            OPTIMIZER_TRACE => setup_memory_table!(OPTIMIZER_TRACE),
+            PARAMETERS => setup_memory_table!(PARAMETERS),
+            PROFILING => setup_memory_table!(PROFILING),
+            REFERENTIAL_CONSTRAINTS => setup_memory_table!(REFERENTIAL_CONSTRAINTS),
+            ROUTINES => setup_memory_table!(ROUTINES),
+            SCHEMA_PRIVILEGES => setup_memory_table!(SCHEMA_PRIVILEGES),
+            TABLE_PRIVILEGES => setup_memory_table!(TABLE_PRIVILEGES),
+            TRIGGERS => setup_memory_table!(TRIGGERS),
+            GLOBAL_STATUS => setup_memory_table!(GLOBAL_STATUS),
+            SESSION_STATUS => setup_memory_table!(SESSION_STATUS),
+            KEY_COLUMN_USAGE => Some(Arc::new(InformationSchemaKeyColumnUsage::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _),
+            SCHEMATA => Some(Arc::new(InformationSchemaSchemata::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _),
+            RUNTIME_METRICS => Some(Arc::new(InformationSchemaMetrics::new())),
+            PARTITIONS => Some(Arc::new(InformationSchemaPartitions::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _),
+            REGION_PEERS => Some(Arc::new(InformationSchemaRegionPeers::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _),
            _ => None,
        }
    }
@@ -102,9 +256,9 @@ impl InformationSchemaProvider {
            .unwrap();
        let table_info = TableInfoBuilder::default()
            .table_id(table.table_id())
-            .name(table.table_name().to_owned())
+            .name(table.table_name().to_string())
            .catalog_name(catalog_name)
-            .schema_name(INFORMATION_SCHEMA_NAME.to_owned())
+            .schema_name(INFORMATION_SCHEMA_NAME.to_string())
            .meta(table_meta)
            .table_type(table.table_type())
            .build()
@@ -120,7 +274,7 @@ trait InformationTable {

    fn schema(&self) -> SchemaRef;

-    fn to_stream(&self) -> Result<SendableRecordBatchStream>;
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;

    fn table_type(&self) -> TableType {
        TableType::Temporary
@@ -154,7 +308,7 @@ impl DataSource for InformationTableDataSource {
        &self,
        request: ScanRequest,
    ) -> std::result::Result<SendableRecordBatchStream, BoxedError> {
-        let projection = request.projection;
+        let projection = request.projection.clone();
        let projected_schema = match &projection {
            Some(projection) => self.try_project(projection)?,
            None => self.table.schema(),
@@ -162,7 +316,7 @@ impl DataSource for InformationTableDataSource {

        let stream = self
            .table
-            .to_stream()
+            .to_stream(request)
            .map_err(BoxedError::new)
            .context(TablesRecordBatchSnafu)
            .map_err(BoxedError::new)?
@@ -171,11 +325,13 @@ impl DataSource for InformationTableDataSource {
                None => batch,
            });

-        let stream = RecordBatchStreamAdaptor {
+        let stream = RecordBatchStreamWrapper {
            schema: projected_schema,
            stream: Box::pin(stream),
            output_ordering: None,
+            metrics: Default::default(),
        };
+
        Ok(Box::pin(stream))
    }
 }
--- a/src/catalog/src/information_schema/columns.rs
+++ b/src/catalog/src/information_schema/columns.rs
@@ -16,8 +16,8 @@ use std::sync::{Arc, Weak};

 use arrow_schema::SchemaRef as ArrowSchemaRef;
 use common_catalog::consts::{
-    INFORMATION_SCHEMA_COLUMNS_TABLE_ID, INFORMATION_SCHEMA_NAME, SEMANTIC_TYPE_FIELD,
-    SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
+    INFORMATION_SCHEMA_COLUMNS_TABLE_ID, SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY,
+    SEMANTIC_TYPE_TIME_INDEX,
 };
 use common_error::ext::BoxedError;
 use common_query::physical_plan::TaskContext;
@@ -29,15 +29,17 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
 use datatypes::prelude::{ConcreteDataType, DataType};
 use datatypes::scalars::ScalarVectorBuilder;
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
 use datatypes::vectors::{StringVectorBuilder, VectorRef};
+use futures::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
-use store_api::storage::TableId;
+use store_api::storage::{ScanRequest, TableId};

-use super::tables::InformationSchemaTables;
-use super::{InformationTable, COLUMNS, TABLES};
+use super::{InformationTable, COLUMNS};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
+use crate::information_schema::Predicates;
 use crate::CatalogManager;

 pub(super) struct InformationSchemaColumns {
@@ -52,6 +54,11 @@ const TABLE_NAME: &str = "table_name";
 const COLUMN_NAME: &str = "column_name";
 const DATA_TYPE: &str = "data_type";
 const SEMANTIC_TYPE: &str = "semantic_type";
+const COLUMN_DEFAULT: &str = "column_default";
+const IS_NULLABLE: &str = "is_nullable";
+const COLUMN_TYPE: &str = "column_type";
+const COLUMN_COMMENT: &str = "column_comment";
+const INIT_CAPACITY: usize = 42;

 impl InformationSchemaColumns {
    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
@@ -70,6 +77,10 @@ impl InformationSchemaColumns {
            ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
            ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
        ]))
    }

@@ -95,14 +106,14 @@ impl InformationTable for InformationSchemaColumns {
        self.schema.clone()
    }

-    fn to_stream(&self) -> Result<SendableRecordBatchStream> {
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
        let schema = self.schema.arrow_schema().clone();
        let mut builder = self.builder();
        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
            schema,
            futures::stream::once(async move {
                builder
-                    .make_tables()
+                    .make_columns(Some(request))
                    .await
                    .map(|x| x.into_df_record_batch())
                    .map_err(Into::into)
@@ -127,6 +138,11 @@ struct InformationSchemaColumnsBuilder {
    column_names: StringVectorBuilder,
    data_types: StringVectorBuilder,
    semantic_types: StringVectorBuilder,
+
+    column_defaults: StringVectorBuilder,
+    is_nullables: StringVectorBuilder,
+    column_types: StringVectorBuilder,
+    column_comments: StringVectorBuilder,
 }

 impl InformationSchemaColumnsBuilder {
@@ -139,55 +155,34 @@ impl InformationSchemaColumnsBuilder {
            schema,
            catalog_name,
            catalog_manager,
-            catalog_names: StringVectorBuilder::with_capacity(42),
-            schema_names: StringVectorBuilder::with_capacity(42),
-            table_names: StringVectorBuilder::with_capacity(42),
-            column_names: StringVectorBuilder::with_capacity(42),
-            data_types: StringVectorBuilder::with_capacity(42),
-            semantic_types: StringVectorBuilder::with_capacity(42),
+            catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            column_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            semantic_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            column_defaults: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            is_nullables: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            column_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            column_comments: StringVectorBuilder::with_capacity(INIT_CAPACITY),
        }
    }

-    /// Construct the `information_schema.tables` virtual table
-    async fn make_tables(&mut self) -> Result<RecordBatch> {
+    /// Construct the `information_schema.columns` virtual table
+    async fn make_columns(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
        let catalog_name = self.catalog_name.clone();
        let catalog_manager = self
            .catalog_manager
            .upgrade()
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+        let predicates = Predicates::from_scan_request(&request);

        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            if !catalog_manager
-                .schema_exists(&catalog_name, &schema_name)
-                .await?
-            {
-                continue;
-            }
-            for table_name in catalog_manager
-                .table_names(&catalog_name, &schema_name)
-                .await?
-            {
-                let (keys, schema) = if let Some(table) = catalog_manager
-                    .table(&catalog_name, &schema_name, &table_name)
-                    .await?
-                {
-                    let keys = &table.table_info().meta.primary_key_indices;
-                    let schema = table.schema();
-                    (keys.clone(), schema)
-                } else {
-                    // TODO: this specific branch is only a workaround for FrontendCatalogManager.
-                    if schema_name == INFORMATION_SCHEMA_NAME {
-                        if table_name == COLUMNS {
-                            (vec![], InformationSchemaColumns::schema())
-                        } else if table_name == TABLES {
-                            (vec![], InformationSchemaTables::schema())
-                        } else {
-                            continue;
-                        }
-                    } else {
-                        continue;
-                    }
-                };
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
+
+            while let Some(table) = stream.try_next().await? {
+                let keys = &table.table_info().meta.primary_key_indices;
+                let schema = table.schema();

                for (idx, column) in schema.column_schemas().iter().enumerate() {
                    let semantic_type = if column.is_time_index() {
@@ -197,13 +192,14 @@ impl InformationSchemaColumnsBuilder {
                    } else {
                        SEMANTIC_TYPE_FIELD
                    };
+
                    self.add_column(
+                        &predicates,
                        &catalog_name,
                        &schema_name,
-                        &table_name,
-                        &column.name,
-                        column.data_type.name(),
+                        &table.table_info().name,
                        semantic_type,
+                        column,
                    );
                }
            }
@@ -214,19 +210,48 @@ impl InformationSchemaColumnsBuilder {

    fn add_column(
        &mut self,
+        predicates: &Predicates,
        catalog_name: &str,
        schema_name: &str,
        table_name: &str,
-        column_name: &str,
-        data_type: &str,
        semantic_type: &str,
+        column_schema: &ColumnSchema,
    ) {
+        let data_type = &column_schema.data_type.name();
+
+        let row = [
+            (TABLE_CATALOG, &Value::from(catalog_name)),
+            (TABLE_SCHEMA, &Value::from(schema_name)),
+            (TABLE_NAME, &Value::from(table_name)),
+            (COLUMN_NAME, &Value::from(column_schema.name.as_str())),
+            (DATA_TYPE, &Value::from(data_type.as_str())),
+            (SEMANTIC_TYPE, &Value::from(semantic_type)),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
        self.catalog_names.push(Some(catalog_name));
        self.schema_names.push(Some(schema_name));
        self.table_names.push(Some(table_name));
-        self.column_names.push(Some(column_name));
+        self.column_names.push(Some(&column_schema.name));
        self.data_types.push(Some(data_type));
        self.semantic_types.push(Some(semantic_type));
+        self.column_defaults.push(
+            column_schema
+                .default_constraint()
+                .map(|s| format!("{}", s))
+                .as_deref(),
+        );
+        if column_schema.is_nullable() {
+            self.is_nullables.push(Some("Yes"));
+        } else {
+            self.is_nullables.push(Some("No"));
+        }
+        self.column_types.push(Some(data_type));
+        self.column_comments
+            .push(column_schema.column_comment().map(|x| x.as_ref()));
    }

    fn finish(&mut self) -> Result<RecordBatch> {
@@ -237,7 +262,12 @@ impl InformationSchemaColumnsBuilder {
            Arc::new(self.column_names.finish()),
            Arc::new(self.data_types.finish()),
            Arc::new(self.semantic_types.finish()),
+            Arc::new(self.column_defaults.finish()),
+            Arc::new(self.is_nullables.finish()),
+            Arc::new(self.column_types.finish()),
+            Arc::new(self.column_comments.finish()),
        ];
+
        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
    }
 }
@@ -254,7 +284,7 @@ impl DfPartitionStream for InformationSchemaColumns {
            schema,
            futures::stream::once(async move {
                builder
-                    .make_tables()
+                    .make_columns(None)
                    .await
                    .map(|x| x.into_df_record_batch())
                    .map_err(Into::into)
--- a/src/catalog/src/information_schema/key_column_usage.rs
+++ b/src/catalog/src/information_schema/key_column_usage.rs
@@ -0,0 +1,345 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::{ScanRequest, TableId};
+
+use super::KEY_COLUMN_USAGE;
+use crate::error::{
+    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::{InformationTable, Predicates};
+use crate::CatalogManager;
+
+const CONSTRAINT_SCHEMA: &str = "constraint_schema";
+const CONSTRAINT_NAME: &str = "constraint_name";
+const TABLE_CATALOG: &str = "table_catalog";
+const TABLE_SCHEMA: &str = "table_schema";
+const TABLE_NAME: &str = "table_name";
+const COLUMN_NAME: &str = "column_name";
+const ORDINAL_POSITION: &str = "ordinal_position";
+const INIT_CAPACITY: usize = 42;
+
+/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
+pub(super) struct InformationSchemaKeyColumnUsage {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaKeyColumnUsage {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(
+                "constraint_catalog",
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                CONSTRAINT_SCHEMA,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::uint32_datatype(), false),
+            ColumnSchema::new(
+                "position_in_unique_constraint",
+                ConcreteDataType::uint32_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "referenced_table_schema",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "referenced_table_name",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "referenced_column_name",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaKeyColumnUsageBuilder {
+        InformationSchemaKeyColumnUsageBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaKeyColumnUsage {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        KEY_COLUMN_USAGE
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_key_column_usage(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+/// Builds the `information_schema.KEY_COLUMN_USAGE` table row by row
+///
+/// Columns are based on <https://dev.mysql.com/doc/refman/8.2/en/information-schema-key-column-usage-table.html>
+struct InformationSchemaKeyColumnUsageBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    constraint_catalog: StringVectorBuilder,
+    constraint_schema: StringVectorBuilder,
+    constraint_name: StringVectorBuilder,
+    table_catalog: StringVectorBuilder,
+    table_schema: StringVectorBuilder,
+    table_name: StringVectorBuilder,
+    column_name: StringVectorBuilder,
+    ordinal_position: UInt32VectorBuilder,
+    position_in_unique_constraint: UInt32VectorBuilder,
+}
+
+impl InformationSchemaKeyColumnUsageBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            constraint_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            constraint_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            constraint_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
+            position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.KEY_COLUMN_USAGE` virtual table
+    async fn make_key_column_usage(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+        let predicates = Predicates::from_scan_request(&request);
+
+        let mut primary_constraints = vec![];
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            if !catalog_manager
+                .schema_exists(&catalog_name, &schema_name)
+                .await?
+            {
+                continue;
+            }
+
+            for table_name in catalog_manager
+                .table_names(&catalog_name, &schema_name)
+                .await?
+            {
+                if let Some(table) = catalog_manager
+                    .table(&catalog_name, &schema_name, &table_name)
+                    .await?
+                {
+                    let keys = &table.table_info().meta.primary_key_indices;
+                    let schema = table.schema();
+
+                    for (idx, column) in schema.column_schemas().iter().enumerate() {
+                        if column.is_time_index() {
+                            self.add_key_column_usage(
+                                &predicates,
+                                &schema_name,
+                                "TIME INDEX",
+                                &schema_name,
+                                &table_name,
+                                &column.name,
+                                1, //always 1 for time index
+                            );
+                        }
+                        if keys.contains(&idx) {
+                            primary_constraints.push((
+                                schema_name.clone(),
+                                table_name.clone(),
+                                column.name.clone(),
+                            ));
+                        }
+                        // TODO(dimbtp): foreign key constraint not supported yet
+                    }
+                } else {
+                    unreachable!();
+                }
+            }
+        }
+
+        for (i, (schema_name, table_name, column_name)) in
+            primary_constraints.into_iter().enumerate()
+        {
+            self.add_key_column_usage(
+                &predicates,
+                &schema_name,
+                "PRIMARY",
+                &schema_name,
+                &table_name,
+                &column_name,
+                i as u32 + 1,
+            );
+        }
+
+        self.finish()
+    }
+
+    // TODO(dimbtp): Foreign key constraint has not `None` value for last 4
+    // fields, but it is not supported yet.
+    #[allow(clippy::too_many_arguments)]
+    fn add_key_column_usage(
+        &mut self,
+        predicates: &Predicates,
+        constraint_schema: &str,
+        constraint_name: &str,
+        table_schema: &str,
+        table_name: &str,
+        column_name: &str,
+        ordinal_position: u32,
+    ) {
+        let row = [
+            (CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
+            (CONSTRAINT_NAME, &Value::from(constraint_name)),
+            (TABLE_SCHEMA, &Value::from(table_schema)),
+            (TABLE_NAME, &Value::from(table_name)),
+            (COLUMN_NAME, &Value::from(column_name)),
+            (ORDINAL_POSITION, &Value::from(ordinal_position)),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
+        self.constraint_catalog.push(Some("def"));
+        self.constraint_schema.push(Some(constraint_schema));
+        self.constraint_name.push(Some(constraint_name));
+        self.table_catalog.push(Some("def"));
+        self.table_schema.push(Some(table_schema));
+        self.table_name.push(Some(table_name));
+        self.column_name.push(Some(column_name));
+        self.ordinal_position.push(Some(ordinal_position));
+        self.position_in_unique_constraint.push(None);
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let rows_num = self.table_catalog.len();
+
+        let null_string_vector = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec![None as Option<&str>])),
+            rows_num,
+        ));
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.constraint_catalog.finish()),
+            Arc::new(self.constraint_schema.finish()),
+            Arc::new(self.constraint_name.finish()),
+            Arc::new(self.table_catalog.finish()),
+            Arc::new(self.table_schema.finish()),
+            Arc::new(self.table_name.finish()),
+            Arc::new(self.column_name.finish()),
+            Arc::new(self.ordinal_position.finish()),
+            Arc::new(self.position_in_unique_constraint.finish()),
+            null_string_vector.clone(),
+            null_string_vector.clone(),
+            null_string_vector,
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaKeyColumnUsage {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_key_column_usage(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/memory_table.rs
+++ b/src/catalog/src/information_schema/memory_table.rs
@@ -0,0 +1,214 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod tables;
+use std::sync::Arc;
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::schema::SchemaRef;
+use datatypes::vectors::VectorRef;
+use snafu::ResultExt;
+use store_api::storage::{ScanRequest, TableId};
+pub use tables::get_schema_columns;
+
+use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
+use crate::information_schema::InformationTable;
+
+/// A memory table with specified schema and columns.
+pub(super) struct MemoryTable {
+    table_id: TableId,
+    table_name: &'static str,
+    schema: SchemaRef,
+    columns: Vec<VectorRef>,
+}
+
+impl MemoryTable {
+    /// Creates a memory table with table id, name, schema and columns.
+    pub(super) fn new(
+        table_id: TableId,
+        table_name: &'static str,
+        schema: SchemaRef,
+        columns: Vec<VectorRef>,
+    ) -> Self {
+        Self {
+            table_id,
+            table_name,
+            schema,
+            columns,
+        }
+    }
+
+    fn builder(&self) -> MemoryTableBuilder {
+        MemoryTableBuilder::new(self.schema.clone(), self.columns.clone())
+    }
+}
+
+impl InformationTable for MemoryTable {
+    fn table_id(&self) -> TableId {
+        self.table_id
+    }
+
+    fn table_name(&self) -> &'static str {
+        self.table_name
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, _request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .memory_records()
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct MemoryTableBuilder {
+    schema: SchemaRef,
+    columns: Vec<VectorRef>,
+}
+
+impl MemoryTableBuilder {
+    fn new(schema: SchemaRef, columns: Vec<VectorRef>) -> Self {
+        Self { schema, columns }
+    }
+
+    /// Construct the `information_schema.{table_name}` virtual table
+    async fn memory_records(&mut self) -> Result<RecordBatch> {
+        if self.columns.is_empty() {
+            RecordBatch::new_empty(self.schema.clone()).context(CreateRecordBatchSnafu)
+        } else {
+            RecordBatch::new(self.schema.clone(), std::mem::take(&mut self.columns))
+                .context(CreateRecordBatchSnafu)
+        }
+    }
+}
+
+impl DfPartitionStream for MemoryTable {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .memory_records()
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use common_recordbatch::RecordBatches;
+    use datatypes::prelude::ConcreteDataType;
+    use datatypes::schema::{ColumnSchema, Schema};
+    use datatypes::vectors::StringVector;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_memory_table() {
+        let schema = Arc::new(Schema::new(vec![
+            ColumnSchema::new("a", ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new("b", ConcreteDataType::string_datatype(), false),
+        ]));
+
+        let table = MemoryTable::new(
+            42,
+            "test",
+            schema.clone(),
+            vec![
+                Arc::new(StringVector::from(vec!["a1", "a2"])),
+                Arc::new(StringVector::from(vec!["b1", "b2"])),
+            ],
+        );
+
+        assert_eq!(42, table.table_id());
+        assert_eq!("test", table.table_name());
+        assert_eq!(schema, InformationTable::schema(&table));
+
+        let stream = table.to_stream(ScanRequest::default()).unwrap();
+
+        let batches = RecordBatches::try_collect(stream).await.unwrap();
+
+        assert_eq!(
+            "\
+----+----+
+| a  | b  |
+----+----+
+| a1 | b1 |
+| a2 | b2 |
+----+----+",
+            batches.pretty_print().unwrap()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_empty_memory_table() {
+        let schema = Arc::new(Schema::new(vec![
+            ColumnSchema::new("a", ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new("b", ConcreteDataType::string_datatype(), false),
+        ]));
+
+        let table = MemoryTable::new(42, "test", schema.clone(), vec![]);
+
+        assert_eq!(42, table.table_id());
+        assert_eq!("test", table.table_name());
+        assert_eq!(schema, InformationTable::schema(&table));
+
+        let stream = table.to_stream(ScanRequest::default()).unwrap();
+
+        let batches = RecordBatches::try_collect(stream).await.unwrap();
+
+        assert_eq!(
+            "\
+---+---+
+| a | b |
+---+---+
+---+---+",
+            batches.pretty_print().unwrap()
+        );
+    }
+}
--- a/src/catalog/src/information_schema/memory_table/tables.rs
+++ b/src/catalog/src/information_schema/memory_table/tables.rs
@@ -0,0 +1,460 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_catalog::consts::MITO_ENGINE;
+use datatypes::prelude::{ConcreteDataType, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::vectors::{Int64Vector, StringVector};
+
+use crate::information_schema::table_names::*;
+
+/// Find the schema and columns by the table_name, only valid for memory tables.
+/// Safety: the user MUST ensure the table schema exists, panic otherwise.
+pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
+    let (column_schemas, columns): (_, Vec<VectorRef>) = match table_name {
+        COLUMN_PRIVILEGES => (
+            string_columns(&[
+                "GRANTEE",
+                "TABLE_CATALOG",
+                "TABLE_SCHEMA",
+                "TABLE_NAME",
+                "COLUMN_NAME",
+                "PRIVILEGE_TYPE",
+                "IS_GRANTABLE",
+            ]),
+            vec![],
+        ),
+
+        COLUMN_STATISTICS => (
+            string_columns(&[
+                "SCHEMA_NAME",
+                "TABLE_NAME",
+                "COLUMN_NAME",
+                // TODO(dennis): It must be a JSON type, but we don't support it yet
+                "HISTOGRAM",
+            ]),
+            vec![],
+        ),
+
+        ENGINES => (
+            string_columns(&[
+                "ENGINE",
+                "SUPPORT",
+                "COMMENT",
+                "TRANSACTIONS",
+                "XA",
+                "SAVEPOINTS",
+            ]),
+            vec![
+                Arc::new(StringVector::from(vec![MITO_ENGINE])),
+                Arc::new(StringVector::from(vec!["DEFAULT"])),
+                Arc::new(StringVector::from(vec![
+                    "Storage engine for time-series data",
+                ])),
+                Arc::new(StringVector::from(vec!["NO"])),
+                Arc::new(StringVector::from(vec!["NO"])),
+                Arc::new(StringVector::from(vec!["NO"])),
+            ],
+        ),
+
+        BUILD_INFO => {
+            let build_info = common_version::build_info();
+            (
+                string_columns(&[
+                    "GIT_BRANCH",
+                    "GIT_COMMIT",
+                    "GIT_COMMIT_SHORT",
+                    "GIT_DIRTY",
+                    "PKG_VERSION",
+                ]),
+                vec![
+                    Arc::new(StringVector::from(vec![build_info.branch.to_string()])),
+                    Arc::new(StringVector::from(vec![build_info.commit.to_string()])),
+                    Arc::new(StringVector::from(vec![build_info
+                        .commit_short
+                        .to_string()])),
+                    Arc::new(StringVector::from(vec![build_info.dirty.to_string()])),
+                    Arc::new(StringVector::from(vec![build_info.version.to_string()])),
+                ],
+            )
+        }
+
+        CHARACTER_SETS => (
+            vec![
+                string_column("CHARACTER_SET_NAME"),
+                string_column("DEFAULT_COLLATE_NAME"),
+                string_column("DESCRIPTION"),
+                bigint_column("MAXLEN"),
+            ],
+            vec![
+                Arc::new(StringVector::from(vec!["utf8"])),
+                Arc::new(StringVector::from(vec!["utf8_bin"])),
+                Arc::new(StringVector::from(vec!["UTF-8 Unicode"])),
+                Arc::new(Int64Vector::from_slice([4])),
+            ],
+        ),
+
+        COLLATIONS => (
+            vec![
+                string_column("COLLATION_NAME"),
+                string_column("CHARACTER_SET_NAME"),
+                bigint_column("ID"),
+                string_column("IS_DEFAULT"),
+                string_column("IS_COMPILED"),
+                bigint_column("SORTLEN"),
+            ],
+            vec![
+                Arc::new(StringVector::from(vec!["utf8_bin"])),
+                Arc::new(StringVector::from(vec!["utf8"])),
+                Arc::new(Int64Vector::from_slice([1])),
+                Arc::new(StringVector::from(vec!["Yes"])),
+                Arc::new(StringVector::from(vec!["Yes"])),
+                Arc::new(Int64Vector::from_slice([1])),
+            ],
+        ),
+
+        COLLATION_CHARACTER_SET_APPLICABILITY => (
+            vec![
+                string_column("COLLATION_NAME"),
+                string_column("CHARACTER_SET_NAME"),
+            ],
+            vec![
+                Arc::new(StringVector::from(vec!["utf8_bin"])),
+                Arc::new(StringVector::from(vec!["utf8"])),
+            ],
+        ),
+
+        CHECK_CONSTRAINTS => (
+            string_columns(&[
+                "CONSTRAINT_CATALOG",
+                "CONSTRAINT_SCHEMA",
+                "CONSTRAINT_NAME",
+                "CHECK_CLAUSE",
+            ]),
+            // Not support check constraints yet
+            vec![],
+        ),
+
+        EVENTS => (
+            vec![
+                string_column("EVENT_CATALOG"),
+                string_column("EVENT_SCHEMA"),
+                string_column("EVENT_NAME"),
+                string_column("DEFINER"),
+                string_column("TIME_ZONE"),
+                string_column("EVENT_BODY"),
+                string_column("EVENT_DEFINITION"),
+                string_column("EVENT_TYPE"),
+                datetime_column("EXECUTE_AT"),
+                bigint_column("INTERVAL_VALUE"),
+                string_column("INTERVAL_FIELD"),
+                string_column("SQL_MODE"),
+                datetime_column("STARTS"),
+                datetime_column("ENDS"),
+                string_column("STATUS"),
+                string_column("ON_COMPLETION"),
+                datetime_column("CREATED"),
+                datetime_column("LAST_ALTERED"),
+                datetime_column("LAST_EXECUTED"),
+                string_column("EVENT_COMMENT"),
+                bigint_column("ORIGINATOR"),
+                string_column("CHARACTER_SET_CLIENT"),
+                string_column("COLLATION_CONNECTION"),
+                string_column("DATABASE_COLLATION"),
+            ],
+            vec![],
+        ),
+
+        FILES => (
+            vec![
+                bigint_column("FILE_ID"),
+                string_column("FILE_NAME"),
+                string_column("FILE_TYPE"),
+                string_column("TABLESPACE_NAME"),
+                string_column("TABLE_CATALOG"),
+                string_column("TABLE_SCHEMA"),
+                string_column("TABLE_NAME"),
+                string_column("LOGFILE_GROUP_NAME"),
+                bigint_column("LOGFILE_GROUP_NUMBER"),
+                string_column("ENGINE"),
+                string_column("FULLTEXT_KEYS"),
+                bigint_column("DELETED_ROWS"),
+                bigint_column("UPDATE_COUNT"),
+                bigint_column("FREE_EXTENTS"),
+                bigint_column("TOTAL_EXTENTS"),
+                bigint_column("EXTENT_SIZE"),
+                bigint_column("INITIAL_SIZE"),
+                bigint_column("MAXIMUM_SIZE"),
+                bigint_column("AUTOEXTEND_SIZE"),
+                datetime_column("CREATION_TIME"),
+                datetime_column("LAST_UPDATE_TIME"),
+                datetime_column("LAST_ACCESS_TIME"),
+                datetime_column("RECOVER_TIME"),
+                bigint_column("TRANSACTION_COUNTER"),
+                string_column("VERSION"),
+                string_column("ROW_FORMAT"),
+                bigint_column("TABLE_ROWS"),
+                bigint_column("AVG_ROW_LENGTH"),
+                bigint_column("DATA_LENGTH"),
+                bigint_column("MAX_DATA_LENGTH"),
+                bigint_column("INDEX_LENGTH"),
+                bigint_column("DATA_FREE"),
+                datetime_column("CREATE_TIME"),
+                datetime_column("UPDATE_TIME"),
+                datetime_column("CHECK_TIME"),
+                string_column("CHECKSUM"),
+                string_column("STATUS"),
+                string_column("EXTRA"),
+            ],
+            vec![],
+        ),
+
+        OPTIMIZER_TRACE => (
+            vec![
+                string_column("QUERY"),
+                string_column("TRACE"),
+                bigint_column("MISSING_BYTES_BEYOND_MAX_MEM_SIZE"),
+                bigint_column("INSUFFICIENT_PRIVILEGES"),
+            ],
+            vec![],
+        ),
+
+        // MySQL(https://dev.mysql.com/doc/refman/8.2/en/information-schema-parameters-table.html)
+        // has the spec that is different from
+        // PostgreSQL(https://www.postgresql.org/docs/current/infoschema-parameters.html).
+        // Follow `MySQL` spec here.
+        PARAMETERS => (
+            vec![
+                string_column("SPECIFIC_CATALOG"),
+                string_column("SPECIFIC_SCHEMA"),
+                string_column("SPECIFIC_NAME"),
+                bigint_column("ORDINAL_POSITION"),
+                string_column("PARAMETER_MODE"),
+                string_column("PARAMETER_NAME"),
+                string_column("DATA_TYPE"),
+                bigint_column("CHARACTER_MAXIMUM_LENGTH"),
+                bigint_column("CHARACTER_OCTET_LENGTH"),
+                bigint_column("NUMERIC_PRECISION"),
+                bigint_column("NUMERIC_SCALE"),
+                bigint_column("DATETIME_PRECISION"),
+                string_column("CHARACTER_SET_NAME"),
+                string_column("COLLATION_NAME"),
+                string_column("DTD_IDENTIFIER"),
+                string_column("ROUTINE_TYPE"),
+            ],
+            vec![],
+        ),
+
+        PROFILING => (
+            vec![
+                bigint_column("QUERY_ID"),
+                bigint_column("SEQ"),
+                string_column("STATE"),
+                bigint_column("DURATION"),
+                bigint_column("CPU_USER"),
+                bigint_column("CPU_SYSTEM"),
+                bigint_column("CONTEXT_VOLUNTARY"),
+                bigint_column("CONTEXT_INVOLUNTARY"),
+                bigint_column("BLOCK_OPS_IN"),
+                bigint_column("BLOCK_OPS_OUT"),
+                bigint_column("MESSAGES_SENT"),
+                bigint_column("MESSAGES_RECEIVED"),
+                bigint_column("PAGE_FAULTS_MAJOR"),
+                bigint_column("PAGE_FAULTS_MINOR"),
+                bigint_column("SWAPS"),
+                string_column("SOURCE_FUNCTION"),
+                string_column("SOURCE_FILE"),
+                bigint_column("SOURCE_LINE"),
+            ],
+            vec![],
+        ),
+
+        // TODO: _Must_ reimplement this table when foreign key constraint is supported.
+        REFERENTIAL_CONSTRAINTS => (
+            vec![
+                string_column("CONSTRAINT_CATALOG"),
+                string_column("CONSTRAINT_SCHEMA"),
+                string_column("CONSTRAINT_NAME"),
+                string_column("UNIQUE_CONSTRAINT_CATALOG"),
+                string_column("UNIQUE_CONSTRAINT_SCHEMA"),
+                string_column("UNIQUE_CONSTRAINT_NAME"),
+                string_column("MATCH_OPTION"),
+                string_column("UPDATE_RULE"),
+                string_column("DELETE_RULE"),
+                string_column("TABLE_NAME"),
+                string_column("REFERENCED_TABLE_NAME"),
+            ],
+            vec![],
+        ),
+
+        ROUTINES => (
+            vec![
+                string_column("SPECIFIC_NAME"),
+                string_column("ROUTINE_CATALOG"),
+                string_column("ROUTINE_SCHEMA"),
+                string_column("ROUTINE_NAME"),
+                string_column("ROUTINE_TYPE"),
+                string_column("DATA_TYPE"),
+                bigint_column("CHARACTER_MAXIMUM_LENGTH"),
+                bigint_column("CHARACTER_OCTET_LENGTH"),
+                bigint_column("NUMERIC_PRECISION"),
+                bigint_column("NUMERIC_SCALE"),
+                bigint_column("DATETIME_PRECISION"),
+                string_column("CHARACTER_SET_NAME"),
+                string_column("COLLATION_NAME"),
+                string_column("DTD_IDENTIFIER"),
+                string_column("ROUTINE_BODY"),
+                string_column("ROUTINE_DEFINITION"),
+                string_column("EXTERNAL_NAME"),
+                string_column("EXTERNAL_LANGUAGE"),
+                string_column("PARAMETER_STYLE"),
+                string_column("IS_DETERMINISTIC"),
+                string_column("SQL_DATA_ACCESS"),
+                string_column("SQL_PATH"),
+                string_column("SECURITY_TYPE"),
+                datetime_column("CREATED"),
+                datetime_column("LAST_ALTERED"),
+                string_column("SQL_MODE"),
+                string_column("ROUTINE_COMMENT"),
+                string_column("DEFINER"),
+                string_column("CHARACTER_SET_CLIENT"),
+                string_column("COLLATION_CONNECTION"),
+                string_column("DATABASE_COLLATION"),
+            ],
+            vec![],
+        ),
+
+        SCHEMA_PRIVILEGES => (
+            vec![
+                string_column("GRANTEE"),
+                string_column("TABLE_CATALOG"),
+                string_column("TABLE_SCHEMA"),
+                string_column("PRIVILEGE_TYPE"),
+                string_column("IS_GRANTABLE"),
+            ],
+            vec![],
+        ),
+
+        TABLE_PRIVILEGES => (
+            vec![
+                string_column("GRANTEE"),
+                string_column("TABLE_CATALOG"),
+                string_column("TABLE_SCHEMA"),
+                string_column("TABLE_NAME"),
+                string_column("PRIVILEGE_TYPE"),
+                string_column("IS_GRANTABLE"),
+            ],
+            vec![],
+        ),
+
+        TRIGGERS => (
+            vec![
+                string_column("TRIGGER_CATALOG"),
+                string_column("TRIGGER_SCHEMA"),
+                string_column("TRIGGER_NAME"),
+                string_column("EVENT_MANIPULATION"),
+                string_column("EVENT_OBJECT_CATALOG"),
+                string_column("EVENT_OBJECT_SCHEMA"),
+                string_column("EVENT_OBJECT_TABLE"),
+                bigint_column("ACTION_ORDER"),
+                string_column("ACTION_CONDITION"),
+                string_column("ACTION_STATEMENT"),
+                string_column("ACTION_ORIENTATION"),
+                string_column("ACTION_TIMING"),
+                string_column("ACTION_REFERENCE_OLD_TABLE"),
+                string_column("ACTION_REFERENCE_NEW_TABLE"),
+                string_column("ACTION_REFERENCE_OLD_ROW"),
+                string_column("ACTION_REFERENCE_NEW_ROW"),
+                datetime_column("CREATED"),
+                string_column("SQL_MODE"),
+                string_column("DEFINER"),
+                string_column("CHARACTER_SET_CLIENT"),
+                string_column("COLLATION_CONNECTION"),
+                string_column("DATABASE_COLLATION"),
+            ],
+            vec![],
+        ),
+
+        // TODO: Considering store internal metrics in `global_status` and
+        // `session_status` tables.
+        GLOBAL_STATUS => (
+            vec![
+                string_column("VARIABLE_NAME"),
+                string_column("VARIABLE_VALUE"),
+            ],
+            vec![],
+        ),
+
+        SESSION_STATUS => (
+            vec![
+                string_column("VARIABLE_NAME"),
+                string_column("VARIABLE_VALUE"),
+            ],
+            vec![],
+        ),
+
+        _ => unreachable!("Unknown table in information_schema: {}", table_name),
+    };
+
+    (Arc::new(Schema::new(column_schemas)), columns)
+}
+
+fn string_columns(names: &[&'static str]) -> Vec<ColumnSchema> {
+    names.iter().map(|name| string_column(name)).collect()
+}
+
+fn string_column(name: &str) -> ColumnSchema {
+    ColumnSchema::new(
+        str::to_lowercase(name),
+        ConcreteDataType::string_datatype(),
+        false,
+    )
+}
+
+fn bigint_column(name: &str) -> ColumnSchema {
+    ColumnSchema::new(
+        str::to_lowercase(name),
+        ConcreteDataType::int64_datatype(),
+        false,
+    )
+}
+
+fn datetime_column(name: &str) -> ColumnSchema {
+    ColumnSchema::new(
+        str::to_lowercase(name),
+        ConcreteDataType::datetime_datatype(),
+        false,
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_string_columns() {
+        let columns = ["a", "b", "c"];
+        let column_schemas = string_columns(&columns);
+
+        assert_eq!(3, column_schemas.len());
+        for (i, name) in columns.iter().enumerate() {
+            let cs = column_schemas.get(i).unwrap();
+
+            assert_eq!(*name, cs.name);
+            assert_eq!(ConcreteDataType::string_datatype(), cs.data_type);
+        }
+    }
+}
--- a/src/catalog/src/information_schema/partitions.rs
+++ b/src/catalog/src/information_schema/partitions.rs
@@ -0,0 +1,399 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use common_time::datetime::DateTime;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{
+    ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
+    MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
+};
+use futures::TryStreamExt;
+use partition::manager::PartitionInfo;
+use partition::partition::PartitionDef;
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::{RegionId, ScanRequest, TableId};
+use table::metadata::{TableInfo, TableType};
+
+use super::PARTITIONS;
+use crate::error::{
+    CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, Result,
+    UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::{InformationTable, Predicates};
+use crate::kvbackend::KvBackendCatalogManager;
+use crate::CatalogManager;
+
+const TABLE_CATALOG: &str = "table_catalog";
+const TABLE_SCHEMA: &str = "table_schema";
+const TABLE_NAME: &str = "table_name";
+const PARTITION_NAME: &str = "partition_name";
+const PARTITION_EXPRESSION: &str = "partition_expression";
+/// The region id
+const GREPTIME_PARTITION_ID: &str = "greptime_partition_id";
+const INIT_CAPACITY: usize = 42;
+
+/// The `PARTITIONS` table provides information about partitioned tables.
+/// See https://dev.mysql.com/doc/refman/8.0/en/information-schema-partitions-table.html
+/// We provide an extral column `greptime_partition_id` for GreptimeDB region id.
+pub(super) struct InformationSchemaPartitions {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaPartitions {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(PARTITION_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(
+                "subpartition_name",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "partition_ordinal_position",
+                ConcreteDataType::int64_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "subpartition_ordinal_position",
+                ConcreteDataType::int64_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "partition_method",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "subpartition_method",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                PARTITION_EXPRESSION,
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "subpartition_expression",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new(
+                "partition_description",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new("table_rows", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new("avg_row_length", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new("data_length", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
+            ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
+            ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
+            ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
+            ColumnSchema::new(
+                "partition_comment",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
+            ColumnSchema::new("nodegroup", ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new("tablespace_name", ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(
+                GREPTIME_PARTITION_ID,
+                ConcreteDataType::uint64_datatype(),
+                true,
+            ),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaPartitionsBuilder {
+        InformationSchemaPartitionsBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaPartitions {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_PARTITIONS_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        PARTITIONS
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_partitions(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaPartitionsBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    catalog_names: StringVectorBuilder,
+    schema_names: StringVectorBuilder,
+    table_names: StringVectorBuilder,
+    partition_names: StringVectorBuilder,
+    partition_ordinal_positions: Int64VectorBuilder,
+    partition_expressions: StringVectorBuilder,
+    create_times: DateTimeVectorBuilder,
+    partition_ids: UInt64VectorBuilder,
+}
+
+impl InformationSchemaPartitionsBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+            partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
+            partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.partitions` virtual table
+    async fn make_partitions(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+
+        let partition_manager = catalog_manager
+            .as_any()
+            .downcast_ref::<KvBackendCatalogManager>()
+            .map(|catalog_manager| catalog_manager.partition_manager());
+
+        let predicates = Predicates::from_scan_request(&request);
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
+
+            while let Some(table) = stream.try_next().await? {
+                let table_info = table.table_info();
+
+                if table_info.table_type == TableType::Temporary {
+                    continue;
+                }
+
+                let table_id = table_info.ident.table_id;
+                let partitions = if let Some(partition_manager) = &partition_manager {
+                    partition_manager
+                        .find_table_partitions(table_id)
+                        .await
+                        .context(FindPartitionsSnafu {
+                            table: &table_info.name,
+                        })?
+                } else {
+                    // Current node must be a standalone instance, contains only one partition by default.
+                    // TODO(dennis): change it when we support multi-regions for standalone.
+                    vec![PartitionInfo {
+                        id: RegionId::new(table_id, 0),
+                        partition: PartitionDef::new(vec![], vec![]),
+                    }]
+                };
+
+                self.add_partitions(
+                    &predicates,
+                    &table_info,
+                    &catalog_name,
+                    &schema_name,
+                    &table_info.name,
+                    &partitions,
+                );
+            }
+        }
+
+        self.finish()
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn add_partitions(
+        &mut self,
+        predicates: &Predicates,
+        table_info: &TableInfo,
+        catalog_name: &str,
+        schema_name: &str,
+        table_name: &str,
+        partitions: &[PartitionInfo],
+    ) {
+        let row = [
+            (TABLE_CATALOG, &Value::from(catalog_name)),
+            (TABLE_SCHEMA, &Value::from(schema_name)),
+            (TABLE_NAME, &Value::from(table_name)),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
+        for (index, partition) in partitions.iter().enumerate() {
+            let partition_name = format!("p{index}");
+
+            self.catalog_names.push(Some(catalog_name));
+            self.schema_names.push(Some(schema_name));
+            self.table_names.push(Some(table_name));
+            self.partition_names.push(Some(&partition_name));
+            self.partition_ordinal_positions
+                .push(Some((index + 1) as i64));
+            let expressions = if partition.partition.partition_columns().is_empty() {
+                None
+            } else {
+                Some(partition.partition.to_string())
+            };
+
+            self.partition_expressions.push(expressions.as_deref());
+            self.create_times.push(Some(DateTime::from(
+                table_info.meta.created_on.timestamp_millis(),
+            )));
+            self.partition_ids.push(Some(partition.id.as_u64()));
+        }
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let rows_num = self.catalog_names.len();
+
+        let null_string_vector = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec![None as Option<&str>])),
+            rows_num,
+        ));
+        let null_i64_vector = Arc::new(ConstantVector::new(
+            Arc::new(Int64Vector::from(vec![None])),
+            rows_num,
+        ));
+        let null_datetime_vector = Arc::new(ConstantVector::new(
+            Arc::new(DateTimeVector::from(vec![None])),
+            rows_num,
+        ));
+        let partition_methods = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec![Some("RANGE")])),
+            rows_num,
+        ));
+
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.catalog_names.finish()),
+            Arc::new(self.schema_names.finish()),
+            Arc::new(self.table_names.finish()),
+            Arc::new(self.partition_names.finish()),
+            null_string_vector.clone(),
+            Arc::new(self.partition_ordinal_positions.finish()),
+            null_i64_vector.clone(),
+            partition_methods,
+            null_string_vector.clone(),
+            Arc::new(self.partition_expressions.finish()),
+            null_string_vector.clone(),
+            null_string_vector.clone(),
+            // TODO(dennis): rows and index statistics info
+            null_i64_vector.clone(),
+            null_i64_vector.clone(),
+            null_i64_vector.clone(),
+            null_i64_vector.clone(),
+            null_i64_vector.clone(),
+            null_i64_vector.clone(),
+            Arc::new(self.create_times.finish()),
+            // TODO(dennis): supports update_time
+            null_datetime_vector.clone(),
+            null_datetime_vector,
+            null_i64_vector,
+            null_string_vector.clone(),
+            null_string_vector.clone(),
+            null_string_vector,
+            Arc::new(self.partition_ids.finish()),
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaPartitions {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_partitions(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/predicate.rs
+++ b/src/catalog/src/information_schema/predicate.rs
@@ -0,0 +1,609 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use arrow::array::StringArray;
+use arrow::compute::kernels::comparison;
+use common_query::logical_plan::DfExpr;
+use datafusion::common::ScalarValue;
+use datafusion::logical_expr::expr::Like;
+use datafusion::logical_expr::Operator;
+use datatypes::value::Value;
+use store_api::storage::ScanRequest;
+
+type ColumnName = String;
+/// Predicate to filter `information_schema` tables stream,
+/// we only support these simple predicates currently.
+/// TODO(dennis): supports more predicate types.
+#[derive(Clone, PartialEq, Eq, Debug)]
+enum Predicate {
+    Eq(ColumnName, Value),
+    Like(ColumnName, String, bool),
+    NotEq(ColumnName, Value),
+    InList(ColumnName, Vec<Value>),
+    And(Box<Predicate>, Box<Predicate>),
+    Or(Box<Predicate>, Box<Predicate>),
+    Not(Box<Predicate>),
+}
+
+impl Predicate {
+    /// Evaluate the predicate with the row, returns:
+    /// - `None` when the predicate can't evaluate with the row.
+    /// - `Some(true)` when the predicate is satisfied,
+    /// - `Some(false)` when the predicate is not satisfied,
+    fn eval(&self, row: &[(&str, &Value)]) -> Option<bool> {
+        match self {
+            Predicate::Eq(c, v) => {
+                for (column, value) in row {
+                    if c != column {
+                        continue;
+                    }
+                    return Some(v == *value);
+                }
+            }
+            Predicate::Like(c, pattern, case_insensitive) => {
+                for (column, value) in row {
+                    if c != column {
+                        continue;
+                    }
+
+                    let Value::String(bs) = value else {
+                        continue;
+                    };
+
+                    return like_utf8(bs.as_utf8(), pattern, case_insensitive);
+                }
+            }
+            Predicate::NotEq(c, v) => {
+                for (column, value) in row {
+                    if c != column {
+                        continue;
+                    }
+                    return Some(v != *value);
+                }
+            }
+            Predicate::InList(c, values) => {
+                for (column, value) in row {
+                    if c != column {
+                        continue;
+                    }
+                    return Some(values.iter().any(|v| v == *value));
+                }
+            }
+            Predicate::And(left, right) => {
+                let left = left.eval(row);
+
+                // short-circuit
+                if matches!(left, Some(false)) {
+                    return Some(false);
+                }
+
+                return match (left, right.eval(row)) {
+                    (Some(left), Some(right)) => Some(left && right),
+                    (None, Some(false)) => Some(false),
+                    _ => None,
+                };
+            }
+            Predicate::Or(left, right) => {
+                let left = left.eval(row);
+
+                // short-circuit
+                if matches!(left, Some(true)) {
+                    return Some(true);
+                }
+
+                return match (left, right.eval(row)) {
+                    (Some(left), Some(right)) => Some(left || right),
+                    (None, Some(true)) => Some(true),
+                    _ => None,
+                };
+            }
+            Predicate::Not(p) => {
+                let Some(b) = p.eval(row) else {
+                    return None;
+                };
+
+                return Some(!b);
+            }
+        }
+
+        // Can't evaluate predicate with the row
+        None
+    }
+
+    /// Try to create a predicate from datafusion [`Expr`], return None if fails.
+    fn from_expr(expr: DfExpr) -> Option<Predicate> {
+        match expr {
+            // NOT expr
+            DfExpr::Not(expr) => {
+                let Some(p) = Self::from_expr(*expr) else {
+                    return None;
+                };
+
+                Some(Predicate::Not(Box::new(p)))
+            }
+            // expr LIKE pattern
+            DfExpr::Like(Like {
+                negated,
+                expr,
+                pattern,
+                case_insensitive,
+                ..
+            }) if is_column(&expr) && is_string_literal(&pattern) => {
+                // Safety: ensured by gurad
+                let DfExpr::Column(c) = *expr else {
+                    unreachable!();
+                };
+                let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = *pattern else {
+                    unreachable!();
+                };
+
+                let p = Predicate::Like(c.name, pattern, case_insensitive);
+
+                if negated {
+                    Some(Predicate::Not(Box::new(p)))
+                } else {
+                    Some(p)
+                }
+            }
+            // left OP right
+            DfExpr::BinaryExpr(bin) => match (*bin.left, bin.op, *bin.right) {
+                // left == right
+                (DfExpr::Literal(scalar), Operator::Eq, DfExpr::Column(c))
+                | (DfExpr::Column(c), Operator::Eq, DfExpr::Literal(scalar)) => {
+                    let Ok(v) = Value::try_from(scalar) else {
+                        return None;
+                    };
+
+                    Some(Predicate::Eq(c.name, v))
+                }
+                // left != right
+                (DfExpr::Literal(scalar), Operator::NotEq, DfExpr::Column(c))
+                | (DfExpr::Column(c), Operator::NotEq, DfExpr::Literal(scalar)) => {
+                    let Ok(v) = Value::try_from(scalar) else {
+                        return None;
+                    };
+
+                    Some(Predicate::NotEq(c.name, v))
+                }
+                // left AND right
+                (left, Operator::And, right) => {
+                    let Some(left) = Self::from_expr(left) else {
+                        return None;
+                    };
+
+                    let Some(right) = Self::from_expr(right) else {
+                        return None;
+                    };
+
+                    Some(Predicate::And(Box::new(left), Box::new(right)))
+                }
+                // left OR right
+                (left, Operator::Or, right) => {
+                    let Some(left) = Self::from_expr(left) else {
+                        return None;
+                    };
+
+                    let Some(right) = Self::from_expr(right) else {
+                        return None;
+                    };
+
+                    Some(Predicate::Or(Box::new(left), Box::new(right)))
+                }
+                _ => None,
+            },
+            // [NOT] IN (LIST)
+            DfExpr::InList(list) => {
+                match (*list.expr, list.list, list.negated) {
+                    // column [NOT] IN (v1, v2, v3, ...)
+                    (DfExpr::Column(c), list, negated) if is_all_scalars(&list) => {
+                        let mut values = Vec::with_capacity(list.len());
+                        for scalar in list {
+                            // Safety: checked by `is_all_scalars`
+                            let DfExpr::Literal(scalar) = scalar else {
+                                unreachable!();
+                            };
+
+                            let Ok(value) = Value::try_from(scalar) else {
+                                return None;
+                            };
+
+                            values.push(value);
+                        }
+
+                        let predicate = Predicate::InList(c.name, values);
+
+                        if negated {
+                            Some(Predicate::Not(Box::new(predicate)))
+                        } else {
+                            Some(predicate)
+                        }
+                    }
+                    _ => None,
+                }
+            }
+            _ => None,
+        }
+    }
+}
+
+/// Perform SQL left LIKE right, return `None` if fail to evaluate.
+/// - `s` the target string
+/// - `pattern` the pattern just like '%abc'
+/// - `case_insensitive` whether to perform case-insensitive like or not.
+fn like_utf8(s: &str, pattern: &str, case_insensitive: &bool) -> Option<bool> {
+    let array = StringArray::from(vec![s]);
+    let patterns = StringArray::new_scalar(pattern);
+
+    let Ok(booleans) = (if *case_insensitive {
+        comparison::ilike(&array, &patterns)
+    } else {
+        comparison::like(&array, &patterns)
+    }) else {
+        return None;
+    };
+
+    // Safety: at least one value in result
+    Some(booleans.value(0))
+}
+
+fn is_string_literal(expr: &DfExpr) -> bool {
+    matches!(expr, DfExpr::Literal(ScalarValue::Utf8(Some(_))))
+}
+
+fn is_column(expr: &DfExpr) -> bool {
+    matches!(expr, DfExpr::Column(_))
+}
+
+/// A list of predicate
+pub struct Predicates {
+    predicates: Vec<Predicate>,
+}
+
+impl Predicates {
+    /// Try its best to create predicates from [`ScanRequest`].
+    pub fn from_scan_request(request: &Option<ScanRequest>) -> Predicates {
+        if let Some(request) = request {
+            let mut predicates = Vec::with_capacity(request.filters.len());
+
+            for filter in &request.filters {
+                if let Some(predicate) = Predicate::from_expr(filter.df_expr().clone()) {
+                    predicates.push(predicate);
+                }
+            }
+
+            Self { predicates }
+        } else {
+            Self {
+                predicates: Vec::new(),
+            }
+        }
+    }
+
+    /// Evaluate the predicates with the row.
+    /// returns true when all the predicates are satisfied or can't be evaluated.
+    pub fn eval(&self, row: &[(&str, &Value)]) -> bool {
+        // fast path
+        if self.predicates.is_empty() {
+            return true;
+        }
+
+        self.predicates
+            .iter()
+            .filter_map(|p| p.eval(row))
+            .all(|b| b)
+    }
+}
+
+/// Returns true when the values are all [`DfExpr::Literal`].
+fn is_all_scalars(list: &[DfExpr]) -> bool {
+    list.iter().all(|v| matches!(v, DfExpr::Literal(_)))
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::common::{Column, ScalarValue};
+    use datafusion::logical_expr::expr::InList;
+    use datafusion::logical_expr::BinaryExpr;
+
+    use super::*;
+
+    #[test]
+    fn test_predicate_eval() {
+        let a_col = "a".to_string();
+        let b_col = "b".to_string();
+        let a_value = Value::from("a_value");
+        let b_value = Value::from("b_value");
+        let wrong_value = Value::from("wrong_value");
+
+        let a_row = [(a_col.as_str(), &a_value)];
+        let b_row = [("b", &wrong_value)];
+        let wrong_row = [(a_col.as_str(), &wrong_value)];
+
+        // Predicate::Eq
+        let p = Predicate::Eq(a_col.clone(), a_value.clone());
+        assert!(p.eval(&a_row).unwrap());
+        assert!(p.eval(&b_row).is_none());
+        assert!(!p.eval(&wrong_row).unwrap());
+
+        // Predicate::NotEq
+        let p = Predicate::NotEq(a_col.clone(), a_value.clone());
+        assert!(!p.eval(&a_row).unwrap());
+        assert!(p.eval(&b_row).is_none());
+        assert!(p.eval(&wrong_row).unwrap());
+
+        // Predicate::InList
+        let p = Predicate::InList(a_col.clone(), vec![a_value.clone(), b_value.clone()]);
+        assert!(p.eval(&a_row).unwrap());
+        assert!(p.eval(&b_row).is_none());
+        assert!(!p.eval(&wrong_row).unwrap());
+        assert!(p.eval(&[(&a_col, &b_value)]).unwrap());
+
+        let p1 = Predicate::Eq(a_col.clone(), a_value.clone());
+        let p2 = Predicate::Eq(b_col.clone(), b_value.clone());
+        let row = [(a_col.as_str(), &a_value), (b_col.as_str(), &b_value)];
+        let wrong_row = [(a_col.as_str(), &a_value), (b_col.as_str(), &wrong_value)];
+
+        //Predicate::And
+        let p = Predicate::And(Box::new(p1.clone()), Box::new(p2.clone()));
+        assert!(p.eval(&row).unwrap());
+        assert!(!p.eval(&wrong_row).unwrap());
+        assert!(p.eval(&[]).is_none());
+        assert!(p.eval(&[("c", &a_value)]).is_none());
+        assert!(!p
+            .eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
+            .unwrap());
+        assert!(!p
+            .eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
+            .unwrap());
+        assert!(p
+            .eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
+            .is_none());
+        assert!(!p
+            .eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
+            .unwrap());
+
+        //Predicate::Or
+        let p = Predicate::Or(Box::new(p1), Box::new(p2));
+        assert!(p.eval(&row).unwrap());
+        assert!(p.eval(&wrong_row).unwrap());
+        assert!(p.eval(&[]).is_none());
+        assert!(p.eval(&[("c", &a_value)]).is_none());
+        assert!(!p
+            .eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
+            .unwrap());
+        assert!(p
+            .eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
+            .unwrap());
+        assert!(p
+            .eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
+            .unwrap());
+        assert!(p
+            .eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
+            .is_none());
+    }
+
+    #[test]
+    fn test_predicate_like() {
+        // case insensitive
+        let expr = DfExpr::Like(Like {
+            negated: false,
+            expr: Box::new(column("a")),
+            pattern: Box::new(string_literal("%abc")),
+            case_insensitive: true,
+            escape_char: None,
+        });
+
+        let p = Predicate::from_expr(expr).unwrap();
+        assert!(
+            matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
+                         c == "a"
+                         && pattern == "%abc"
+                         && *case_insensitive)
+        );
+
+        let match_row = [
+            ("a", &Value::from("hello AbC")),
+            ("b", &Value::from("b value")),
+        ];
+        let unmatch_row = [("a", &Value::from("bca")), ("b", &Value::from("b value"))];
+
+        assert!(p.eval(&match_row).unwrap());
+        assert!(!p.eval(&unmatch_row).unwrap());
+        assert!(p.eval(&[]).is_none());
+
+        // case sensitive
+        let expr = DfExpr::Like(Like {
+            negated: false,
+            expr: Box::new(column("a")),
+            pattern: Box::new(string_literal("%abc")),
+            case_insensitive: false,
+            escape_char: None,
+        });
+
+        let p = Predicate::from_expr(expr).unwrap();
+        assert!(
+            matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
+                         c == "a"
+                         && pattern == "%abc"
+                         && !*case_insensitive)
+        );
+        assert!(!p.eval(&match_row).unwrap());
+        assert!(!p.eval(&unmatch_row).unwrap());
+        assert!(p.eval(&[]).is_none());
+
+        // not like
+        let expr = DfExpr::Like(Like {
+            negated: true,
+            expr: Box::new(column("a")),
+            pattern: Box::new(string_literal("%abc")),
+            case_insensitive: true,
+            escape_char: None,
+        });
+
+        let p = Predicate::from_expr(expr).unwrap();
+        assert!(!p.eval(&match_row).unwrap());
+        assert!(p.eval(&unmatch_row).unwrap());
+        assert!(p.eval(&[]).is_none());
+    }
+
+    fn column(name: &str) -> DfExpr {
+        DfExpr::Column(Column {
+            relation: None,
+            name: name.to_string(),
+        })
+    }
+
+    fn string_literal(v: &str) -> DfExpr {
+        DfExpr::Literal(ScalarValue::Utf8(Some(v.to_string())))
+    }
+
+    fn match_string_value(v: &Value, expected: &str) -> bool {
+        matches!(v, Value::String(bs) if bs.as_utf8() == expected)
+    }
+
+    fn match_string_values(vs: &[Value], expected: &[&str]) -> bool {
+        assert_eq!(vs.len(), expected.len());
+
+        let mut result = true;
+        for (i, v) in vs.iter().enumerate() {
+            result = result && match_string_value(v, expected[i]);
+        }
+
+        result
+    }
+
+    fn mock_exprs() -> (DfExpr, DfExpr) {
+        let expr1 = DfExpr::BinaryExpr(BinaryExpr {
+            left: Box::new(column("a")),
+            op: Operator::Eq,
+            right: Box::new(string_literal("a_value")),
+        });
+
+        let expr2 = DfExpr::BinaryExpr(BinaryExpr {
+            left: Box::new(column("b")),
+            op: Operator::NotEq,
+            right: Box::new(string_literal("b_value")),
+        });
+
+        (expr1, expr2)
+    }
+
+    #[test]
+    fn test_predicate_from_expr() {
+        let (expr1, expr2) = mock_exprs();
+
+        let p1 = Predicate::from_expr(expr1.clone()).unwrap();
+        assert!(matches!(&p1, Predicate::Eq(column, v) if column == "a"
+                         && match_string_value(v, "a_value")));
+
+        let p2 = Predicate::from_expr(expr2.clone()).unwrap();
+        assert!(matches!(&p2, Predicate::NotEq(column, v) if column == "b"
+                         && match_string_value(v, "b_value")));
+
+        let and_expr = DfExpr::BinaryExpr(BinaryExpr {
+            left: Box::new(expr1.clone()),
+            op: Operator::And,
+            right: Box::new(expr2.clone()),
+        });
+        let or_expr = DfExpr::BinaryExpr(BinaryExpr {
+            left: Box::new(expr1.clone()),
+            op: Operator::Or,
+            right: Box::new(expr2.clone()),
+        });
+        let not_expr = DfExpr::Not(Box::new(expr1.clone()));
+
+        let and_p = Predicate::from_expr(and_expr).unwrap();
+        assert!(matches!(and_p, Predicate::And(left, right) if *left == p1 && *right == p2));
+        let or_p = Predicate::from_expr(or_expr).unwrap();
+        assert!(matches!(or_p, Predicate::Or(left, right) if *left == p1 && *right == p2));
+        let not_p = Predicate::from_expr(not_expr).unwrap();
+        assert!(matches!(not_p, Predicate::Not(p) if *p == p1));
+
+        let inlist_expr = DfExpr::InList(InList {
+            expr: Box::new(column("a")),
+            list: vec![string_literal("a1"), string_literal("a2")],
+            negated: false,
+        });
+
+        let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
+        assert!(matches!(&inlist_p, Predicate::InList(c, values) if c == "a"
+                         && match_string_values(values, &["a1", "a2"])));
+
+        let inlist_expr = DfExpr::InList(InList {
+            expr: Box::new(column("a")),
+            list: vec![string_literal("a1"), string_literal("a2")],
+            negated: true,
+        });
+        let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
+        assert!(matches!(inlist_p, Predicate::Not(p) if
+                         matches!(&*p,
+                                  Predicate::InList(c, values) if c == "a"
+                                  && match_string_values(values, &["a1", "a2"]))));
+    }
+
+    #[test]
+    fn test_predicates_from_scan_request() {
+        let predicates = Predicates::from_scan_request(&None);
+        assert!(predicates.predicates.is_empty());
+
+        let (expr1, expr2) = mock_exprs();
+
+        let request = ScanRequest {
+            filters: vec![expr1.into(), expr2.into()],
+            ..Default::default()
+        };
+        let predicates = Predicates::from_scan_request(&Some(request));
+
+        assert_eq!(2, predicates.predicates.len());
+        assert!(
+            matches!(&predicates.predicates[0], Predicate::Eq(column, v) if column == "a"
+                     && match_string_value(v, "a_value"))
+        );
+        assert!(
+            matches!(&predicates.predicates[1], Predicate::NotEq(column, v) if column == "b"
+                     && match_string_value(v, "b_value"))
+        );
+    }
+
+    #[test]
+    fn test_predicates_eval_row() {
+        let wrong_row = [
+            ("a", &Value::from("a_value")),
+            ("b", &Value::from("b_value")),
+            ("c", &Value::from("c_value")),
+        ];
+        let row = [
+            ("a", &Value::from("a_value")),
+            ("b", &Value::from("not_b_value")),
+            ("c", &Value::from("c_value")),
+        ];
+        let c_row = [("c", &Value::from("c_value"))];
+
+        // test empty predicates, always returns true
+        let predicates = Predicates::from_scan_request(&None);
+        assert!(predicates.eval(&row));
+        assert!(predicates.eval(&wrong_row));
+        assert!(predicates.eval(&c_row));
+
+        let (expr1, expr2) = mock_exprs();
+        let request = ScanRequest {
+            filters: vec![expr1.into(), expr2.into()],
+            ..Default::default()
+        };
+        let predicates = Predicates::from_scan_request(&Some(request));
+        assert!(predicates.eval(&row));
+        assert!(!predicates.eval(&wrong_row));
+        assert!(predicates.eval(&c_row));
+    }
+}
--- a/src/catalog/src/information_schema/region_peers.rs
+++ b/src/catalog/src/information_schema/region_peers.rs
@@ -0,0 +1,279 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use core::pin::pin;
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_meta::rpc::router::RegionRoute;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::{Int64VectorBuilder, StringVectorBuilder, UInt64VectorBuilder};
+use futures::{StreamExt, TryStreamExt};
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::{ScanRequest, TableId};
+use table::metadata::TableType;
+
+use super::REGION_PEERS;
+use crate::error::{
+    CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
+    UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::{InformationTable, Predicates};
+use crate::kvbackend::KvBackendCatalogManager;
+use crate::CatalogManager;
+
+const REGION_ID: &str = "region_id";
+const PEER_ID: &str = "peer_id";
+const PEER_ADDR: &str = "peer_addr";
+const IS_LEADER: &str = "is_leader";
+const STATUS: &str = "status";
+const DOWN_SECONDS: &str = "down_seconds";
+const INIT_CAPACITY: usize = 42;
+
+/// The `REGION_PEERS` table provides information about the region distribution and routes. Including fields:
+///
+/// - `region_id`: the region id
+/// - `peer_id`: the region storage datanode peer id
+/// - `peer_addr`: the region storage datanode peer address
+/// - `is_leader`: whether the peer is the leader
+/// - `status`: the region status, `ALIVE` or `DOWNGRADED`.
+/// - `down_seconds`: the duration of being offline, in seconds.
+///
+pub(super) struct InformationSchemaRegionPeers {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaRegionPeers {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
+            ColumnSchema::new(PEER_ID, ConcreteDataType::uint64_datatype(), true),
+            ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(IS_LEADER, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(DOWN_SECONDS, ConcreteDataType::int64_datatype(), true),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaRegionPeersBuilder {
+        InformationSchemaRegionPeersBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaRegionPeers {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        REGION_PEERS
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_region_peers(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaRegionPeersBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    region_ids: UInt64VectorBuilder,
+    peer_ids: UInt64VectorBuilder,
+    peer_addrs: StringVectorBuilder,
+    is_leaders: StringVectorBuilder,
+    statuses: StringVectorBuilder,
+    down_seconds: Int64VectorBuilder,
+}
+
+impl InformationSchemaRegionPeersBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            peer_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
+            peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            is_leaders: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            down_seconds: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.region_peers` virtual table
+    async fn make_region_peers(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+
+        let partition_manager = catalog_manager
+            .as_any()
+            .downcast_ref::<KvBackendCatalogManager>()
+            .map(|catalog_manager| catalog_manager.partition_manager());
+
+        let predicates = Predicates::from_scan_request(&request);
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            let table_id_stream = catalog_manager
+                .tables(&catalog_name, &schema_name)
+                .await
+                .try_filter_map(|t| async move {
+                    let table_info = t.table_info();
+                    if table_info.table_type == TableType::Temporary {
+                        Ok(None)
+                    } else {
+                        Ok(Some(table_info.ident.table_id))
+                    }
+                });
+
+            const BATCH_SIZE: usize = 128;
+
+            // Split table ids into chunks
+            let mut table_id_chunks = pin!(table_id_stream.ready_chunks(BATCH_SIZE));
+
+            while let Some(table_ids) = table_id_chunks.next().await {
+                let table_ids = table_ids.into_iter().collect::<Result<Vec<_>>>()?;
+
+                let table_routes = if let Some(partition_manager) = &partition_manager {
+                    partition_manager
+                        .find_region_routes_batch(&table_ids)
+                        .await
+                        .context(FindRegionRoutesSnafu)?
+                } else {
+                    table_ids.into_iter().map(|id| (id, vec![])).collect()
+                };
+
+                for routes in table_routes.values() {
+                    self.add_region_peers(&predicates, routes);
+                }
+            }
+        }
+
+        self.finish()
+    }
+
+    fn add_region_peers(&mut self, predicates: &Predicates, routes: &[RegionRoute]) {
+        for route in routes {
+            let region_id = route.region.id.as_u64();
+            let peer_id = route.leader_peer.clone().map(|p| p.id);
+            let peer_addr = route.leader_peer.clone().map(|p| p.addr);
+            let status = if let Some(status) = route.leader_status {
+                Some(status.as_ref().to_string())
+            } else {
+                // Alive by default
+                Some("ALIVE".to_string())
+            };
+
+            let row = [(REGION_ID, &Value::from(region_id))];
+
+            if !predicates.eval(&row) {
+                return;
+            }
+
+            // TODO(dennis): adds followers.
+            self.region_ids.push(Some(region_id));
+            self.peer_ids.push(peer_id);
+            self.peer_addrs.push(peer_addr.as_deref());
+            self.is_leaders.push(Some("Yes"));
+            self.statuses.push(status.as_deref());
+            self.down_seconds
+                .push(route.leader_down_millis().map(|m| m / 1000));
+        }
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.region_ids.finish()),
+            Arc::new(self.peer_ids.finish()),
+            Arc::new(self.peer_addrs.finish()),
+            Arc::new(self.is_leaders.finish()),
+            Arc::new(self.statuses.finish()),
+            Arc::new(self.down_seconds.finish()),
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaRegionPeers {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_region_peers(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/runtime_metrics.rs
+++ b/src/catalog/src/information_schema/runtime_metrics.rs
@@ -0,0 +1,250 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use common_time::util::current_time_millis;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, MutableVector};
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::vectors::{
+    ConstantVector, Float64VectorBuilder, StringVector, StringVectorBuilder,
+    TimestampMillisecondVector, VectorRef,
+};
+use itertools::Itertools;
+use snafu::ResultExt;
+use store_api::storage::{ScanRequest, TableId};
+
+use super::{InformationTable, RUNTIME_METRICS};
+use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
+
+pub(super) struct InformationSchemaMetrics {
+    schema: SchemaRef,
+}
+
+const METRIC_NAME: &str = "metric_name";
+const METRIC_VALUE: &str = "value";
+const METRIC_LABELS: &str = "labels";
+const NODE: &str = "node";
+const NODE_TYPE: &str = "node_type";
+const TIMESTAMP: &str = "timestamp";
+
+/// The `information_schema.runtime_metrics` virtual table.
+/// It provides the GreptimeDB runtime metrics for the users by SQL.
+impl InformationSchemaMetrics {
+    pub(super) fn new() -> Self {
+        Self {
+            schema: Self::schema(),
+        }
+    }
+
+    fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(METRIC_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(METRIC_VALUE, ConcreteDataType::float64_datatype(), false),
+            ColumnSchema::new(METRIC_LABELS, ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(NODE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(NODE_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(
+                TIMESTAMP,
+                ConcreteDataType::timestamp_millisecond_datatype(),
+                false,
+            ),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaMetricsBuilder {
+        InformationSchemaMetricsBuilder::new(self.schema.clone())
+    }
+}
+
+impl InformationTable for InformationSchemaMetrics {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        RUNTIME_METRICS
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_metrics(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaMetricsBuilder {
+    schema: SchemaRef,
+
+    metric_names: StringVectorBuilder,
+    metric_values: Float64VectorBuilder,
+    metric_labels: StringVectorBuilder,
+}
+
+impl InformationSchemaMetricsBuilder {
+    fn new(schema: SchemaRef) -> Self {
+        Self {
+            schema,
+            metric_names: StringVectorBuilder::with_capacity(42),
+            metric_values: Float64VectorBuilder::with_capacity(42),
+            metric_labels: StringVectorBuilder::with_capacity(42),
+        }
+    }
+
+    fn add_metric(&mut self, metric_name: &str, labels: String, metric_value: f64) {
+        self.metric_names.push(Some(metric_name));
+        self.metric_values.push(Some(metric_value));
+        self.metric_labels.push(Some(&labels));
+    }
+
+    async fn make_metrics(&mut self, _request: Option<ScanRequest>) -> Result<RecordBatch> {
+        let metric_families = prometheus::gather();
+
+        let write_request =
+            common_telemetry::metric::convert_metric_to_write_request(metric_families, None, 0);
+
+        for ts in write_request.timeseries {
+            //Safety: always has `__name__` label
+            let metric_name = ts
+                .labels
+                .iter()
+                .find_map(|label| {
+                    if label.name == "__name__" {
+                        Some(label.value.clone())
+                    } else {
+                        None
+                    }
+                })
+                .unwrap();
+
+            self.add_metric(
+                &metric_name,
+                ts.labels
+                    .into_iter()
+                    .filter_map(|label| {
+                        if label.name == "__name__" {
+                            None
+                        } else {
+                            Some(format!("{}={}", label.name, label.value))
+                        }
+                    })
+                    .join(", "),
+                // Safety: always has a sample
+                ts.samples[0].value,
+            );
+        }
+
+        self.finish()
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let rows_num = self.metric_names.len();
+        let unknowns = Arc::new(ConstantVector::new(
+            Arc::new(StringVector::from(vec!["unknown"])),
+            rows_num,
+        ));
+        let timestamps = Arc::new(ConstantVector::new(
+            Arc::new(TimestampMillisecondVector::from_slice([
+                current_time_millis(),
+            ])),
+            rows_num,
+        ));
+
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.metric_names.finish()),
+            Arc::new(self.metric_values.finish()),
+            Arc::new(self.metric_labels.finish()),
+            // TODO(dennis): supports node and node_type for cluster
+            unknowns.clone(),
+            unknowns,
+            timestamps,
+        ];
+
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaMetrics {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_metrics(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use common_recordbatch::RecordBatches;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_make_metrics() {
+        let metrics = InformationSchemaMetrics::new();
+
+        let stream = metrics.to_stream(ScanRequest::default()).unwrap();
+
+        let batches = RecordBatches::try_collect(stream).await.unwrap();
+
+        let result_literal = batches.pretty_print().unwrap();
+
+        assert!(result_literal.contains(METRIC_NAME));
+        assert!(result_literal.contains(METRIC_VALUE));
+        assert!(result_literal.contains(METRIC_LABELS));
+        assert!(result_literal.contains(NODE));
+        assert!(result_literal.contains(NODE_TYPE));
+        assert!(result_literal.contains(TIMESTAMP));
+    }
+}
--- a/src/catalog/src/information_schema/schemata.rs
+++ b/src/catalog/src/information_schema/schemata.rs
@@ -0,0 +1,222 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_SCHEMATA_TABLE_ID;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
+use datatypes::vectors::StringVectorBuilder;
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::{ScanRequest, TableId};
+
+use super::SCHEMATA;
+use crate::error::{
+    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::{InformationTable, Predicates};
+use crate::CatalogManager;
+
+const CATALOG_NAME: &str = "catalog_name";
+const SCHEMA_NAME: &str = "schema_name";
+const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name";
+const DEFAULT_COLLATION_NAME: &str = "default_collation_name";
+const INIT_CAPACITY: usize = 42;
+
+/// The `information_schema.schemata` table implementation.
+pub(super) struct InformationSchemaSchemata {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaSchemata {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            schema: Self::schema(),
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            ColumnSchema::new(CATALOG_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(SCHEMA_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(
+                DEFAULT_CHARACTER_SET_NAME,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new(
+                DEFAULT_COLLATION_NAME,
+                ConcreteDataType::string_datatype(),
+                false,
+            ),
+            ColumnSchema::new("sql_path", ConcreteDataType::string_datatype(), true),
+        ]))
+    }
+
+    fn builder(&self) -> InformationSchemaSchemataBuilder {
+        InformationSchemaSchemataBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationTable for InformationSchemaSchemata {
+    fn table_id(&self) -> TableId {
+        INFORMATION_SCHEMA_SCHEMATA_TABLE_ID
+    }
+
+    fn table_name(&self) -> &'static str {
+        SCHEMATA
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_schemata(Some(request))
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+/// Builds the `information_schema.schemata` table row by row
+///
+/// Columns are based on <https://docs.pingcap.com/tidb/stable/information-schema-schemata>
+struct InformationSchemaSchemataBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    catalog_names: StringVectorBuilder,
+    schema_names: StringVectorBuilder,
+    charset_names: StringVectorBuilder,
+    collation_names: StringVectorBuilder,
+    sql_paths: StringVectorBuilder,
+}
+
+impl InformationSchemaSchemataBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            charset_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            collation_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            sql_paths: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+        }
+    }
+
+    /// Construct the `information_schema.schemata` virtual table
+    async fn make_schemata(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+        let predicates = Predicates::from_scan_request(&request);
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            self.add_schema(&predicates, &catalog_name, &schema_name);
+        }
+
+        self.finish()
+    }
+
+    fn add_schema(&mut self, predicates: &Predicates, catalog_name: &str, schema_name: &str) {
+        let row = [
+            (CATALOG_NAME, &Value::from(catalog_name)),
+            (SCHEMA_NAME, &Value::from(schema_name)),
+            (DEFAULT_CHARACTER_SET_NAME, &Value::from("utf8")),
+            (DEFAULT_COLLATION_NAME, &Value::from("utf8_bin")),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
+        self.catalog_names.push(Some(catalog_name));
+        self.schema_names.push(Some(schema_name));
+        self.charset_names.push(Some("utf8"));
+        self.collation_names.push(Some("utf8_bin"));
+        self.sql_paths.push(None);
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.catalog_names.finish()),
+            Arc::new(self.schema_names.finish()),
+            Arc::new(self.charset_names.finish()),
+            Arc::new(self.collation_names.finish()),
+            Arc::new(self.sql_paths.finish()),
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaSchemata {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_schemata(None)
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/table_names.rs
+++ b/src/catalog/src/information_schema/table_names.rs
@@ -0,0 +1,43 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/// All table names in `information_schema`.
+
+pub const TABLES: &str = "tables";
+pub const COLUMNS: &str = "columns";
+pub const ENGINES: &str = "engines";
+pub const COLUMN_PRIVILEGES: &str = "column_privileges";
+pub const COLUMN_STATISTICS: &str = "column_statistics";
+pub const BUILD_INFO: &str = "build_info";
+pub const CHARACTER_SETS: &str = "character_sets";
+pub const COLLATIONS: &str = "collations";
+pub const COLLATION_CHARACTER_SET_APPLICABILITY: &str = "collation_character_set_applicability";
+pub const CHECK_CONSTRAINTS: &str = "check_constraints";
+pub const EVENTS: &str = "events";
+pub const FILES: &str = "files";
+pub const SCHEMATA: &str = "schemata";
+pub const KEY_COLUMN_USAGE: &str = "key_column_usage";
+pub const OPTIMIZER_TRACE: &str = "optimizer_trace";
+pub const PARAMETERS: &str = "parameters";
+pub const PROFILING: &str = "profiling";
+pub const REFERENTIAL_CONSTRAINTS: &str = "referential_constraints";
+pub const ROUTINES: &str = "routines";
+pub const SCHEMA_PRIVILEGES: &str = "schema_privileges";
+pub const TABLE_PRIVILEGES: &str = "table_privileges";
+pub const TRIGGERS: &str = "triggers";
+pub const GLOBAL_STATUS: &str = "global_status";
+pub const SESSION_STATUS: &str = "session_status";
+pub const RUNTIME_METRICS: &str = "runtime_metrics";
+pub const PARTITIONS: &str = "partitions";
+pub const REGION_PEERS: &str = "greptime_region_peers";
--- a/src/catalog/src/information_schema/tables.rs
+++ b/src/catalog/src/information_schema/tables.rs
@@ -15,10 +15,7 @@
 use std::sync::{Arc, Weak};

 use arrow_schema::SchemaRef as ArrowSchemaRef;
-use common_catalog::consts::{
-    INFORMATION_SCHEMA_COLUMNS_TABLE_ID, INFORMATION_SCHEMA_NAME,
-    INFORMATION_SCHEMA_TABLES_TABLE_ID,
-};
+use common_catalog::consts::INFORMATION_SCHEMA_TABLES_TABLE_ID;
 use common_error::ext::BoxedError;
 use common_query::physical_plan::TaskContext;
 use common_recordbatch::adapter::RecordBatchStreamAdapter;
@@ -28,18 +25,28 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
 use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
 use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::value::Value;
 use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
+use futures::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
-use store_api::storage::TableId;
+use store_api::storage::{ScanRequest, TableId};
 use table::metadata::TableType;

-use super::{COLUMNS, TABLES};
+use super::TABLES;
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
-use crate::information_schema::InformationTable;
+use crate::information_schema::{InformationTable, Predicates};
 use crate::CatalogManager;

+const TABLE_CATALOG: &str = "table_catalog";
+const TABLE_SCHEMA: &str = "table_schema";
+const TABLE_NAME: &str = "table_name";
+const TABLE_TYPE: &str = "table_type";
+const TABLE_ID: &str = "table_id";
+const ENGINE: &str = "engine";
+const INIT_CAPACITY: usize = 42;
+
 pub(super) struct InformationSchemaTables {
    schema: SchemaRef,
    catalog_name: String,
@@ -57,12 +64,12 @@ impl InformationSchemaTables {

    pub(crate) fn schema() -> SchemaRef {
        Arc::new(Schema::new(vec![
-            ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
-            ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
-            ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
-            ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
-            ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true),
-            ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true),
+            ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), true),
+            ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
        ]))
    }

@@ -88,14 +95,14 @@ impl InformationTable for InformationSchemaTables {
        self.schema.clone()
    }

-    fn to_stream(&self) -> Result<SendableRecordBatchStream> {
+    fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
        let schema = self.schema.arrow_schema().clone();
        let mut builder = self.builder();
        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
            schema,
            futures::stream::once(async move {
                builder
-                    .make_tables()
+                    .make_tables(Some(request))
                    .await
                    .map(|x| x.into_df_record_batch())
                    .map_err(Into::into)
@@ -135,80 +142,48 @@ impl InformationSchemaTablesBuilder {
            schema,
            catalog_name,
            catalog_manager,
-            catalog_names: StringVectorBuilder::with_capacity(42),
-            schema_names: StringVectorBuilder::with_capacity(42),
-            table_names: StringVectorBuilder::with_capacity(42),
-            table_types: StringVectorBuilder::with_capacity(42),
-            table_ids: UInt32VectorBuilder::with_capacity(42),
-            engines: StringVectorBuilder::with_capacity(42),
+            catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
+            engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
        }
    }

    /// Construct the `information_schema.tables` virtual table
-    async fn make_tables(&mut self) -> Result<RecordBatch> {
+    async fn make_tables(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
        let catalog_name = self.catalog_name.clone();
        let catalog_manager = self
            .catalog_manager
            .upgrade()
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+        let predicates = Predicates::from_scan_request(&request);

        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            if !catalog_manager
-                .schema_exists(&catalog_name, &schema_name)
-                .await?
-            {
-                continue;
-            }
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;

-            for table_name in catalog_manager
-                .table_names(&catalog_name, &schema_name)
-                .await?
-            {
-                if let Some(table) = catalog_manager
-                    .table(&catalog_name, &schema_name, &table_name)
-                    .await?
-                {
-                    let table_info = table.table_info();
-                    self.add_table(
-                        &catalog_name,
-                        &schema_name,
-                        &table_name,
-                        table.table_type(),
-                        Some(table_info.ident.table_id),
-                        Some(&table_info.meta.engine),
-                    );
-                } else {
-                    // TODO: this specific branch is only a workaround for FrontendCatalogManager.
-                    if schema_name == INFORMATION_SCHEMA_NAME {
-                        if table_name == COLUMNS {
-                            self.add_table(
-                                &catalog_name,
-                                &schema_name,
-                                &table_name,
-                                TableType::Temporary,
-                                Some(INFORMATION_SCHEMA_COLUMNS_TABLE_ID),
-                                None,
-                            );
-                        } else if table_name == TABLES {
-                            self.add_table(
-                                &catalog_name,
-                                &schema_name,
-                                &table_name,
-                                TableType::Temporary,
-                                Some(INFORMATION_SCHEMA_TABLES_TABLE_ID),
-                                None,
-                            );
-                        }
-                    }
-                };
+            while let Some(table) = stream.try_next().await? {
+                let table_info = table.table_info();
+                self.add_table(
+                    &predicates,
+                    &catalog_name,
+                    &schema_name,
+                    &table_info.name,
+                    table.table_type(),
+                    Some(table_info.ident.table_id),
+                    Some(&table_info.meta.engine),
+                );
            }
        }

        self.finish()
    }

+    #[allow(clippy::too_many_arguments)]
    fn add_table(
        &mut self,
+        predicates: &Predicates,
        catalog_name: &str,
        schema_name: &str,
        table_name: &str,
@@ -216,14 +191,27 @@ impl InformationSchemaTablesBuilder {
        table_id: Option<u32>,
        engine: Option<&str>,
    ) {
-        self.catalog_names.push(Some(catalog_name));
-        self.schema_names.push(Some(schema_name));
-        self.table_names.push(Some(table_name));
-        self.table_types.push(Some(match table_type {
+        let table_type = match table_type {
            TableType::Base => "BASE TABLE",
            TableType::View => "VIEW",
            TableType::Temporary => "LOCAL TEMPORARY",
-        }));
+        };
+
+        let row = [
+            (TABLE_CATALOG, &Value::from(catalog_name)),
+            (TABLE_SCHEMA, &Value::from(schema_name)),
+            (TABLE_NAME, &Value::from(table_name)),
+            (TABLE_TYPE, &Value::from(table_type)),
+        ];
+
+        if !predicates.eval(&row) {
+            return;
+        }
+
+        self.catalog_names.push(Some(catalog_name));
+        self.schema_names.push(Some(schema_name));
+        self.table_names.push(Some(table_name));
+        self.table_types.push(Some(table_type));
        self.table_ids.push(table_id);
        self.engines.push(engine);
    }
@@ -253,7 +241,7 @@ impl DfPartitionStream for InformationSchemaTables {
            schema,
            futures::stream::once(async move {
                builder
-                    .make_tables()
+                    .make_tables(None)
                    .await
                    .map(|x| x.into_df_record_batch())
                    .map_err(Into::into)
--- a/src/catalog/src/kvbackend.rs
+++ b/src/catalog/src/kvbackend.rs
@@ -12,11 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-pub use client::{CachedMetaKvBackend, MetaKvBackend};
+pub use client::{CachedMetaKvBackend, CachedMetaKvBackendBuilder, MetaKvBackend};

 mod client;
 mod manager;

-#[cfg(feature = "testing")]
-pub mod mock;
 pub use manager::KvBackendCatalogManager;
--- a/src/catalog/src/kvbackend/client.rs
+++ b/src/catalog/src/kvbackend/client.rs
@@ -14,8 +14,10 @@

 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
 use std::time::Duration;
+use std::usize;

 use common_error::ext::BoxedError;
 use common_meta::cache_invalidator::KvCacheInvalidator;
@@ -33,18 +35,91 @@ use meta_client::client::MetaClient;
 use moka::future::{Cache, CacheBuilder};
 use snafu::{OptionExt, ResultExt};

-use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
+use crate::metrics::{
+    METRIC_CATALOG_KV_BATCH_GET, METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET,
+};

-const CACHE_MAX_CAPACITY: u64 = 10000;
-const CACHE_TTL_SECOND: u64 = 10 * 60;
-const CACHE_TTI_SECOND: u64 = 5 * 60;
+const DEFAULT_CACHE_MAX_CAPACITY: u64 = 10000;
+const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
+const DEFAULT_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
+
+pub struct CachedMetaKvBackendBuilder {
+    cache_max_capacity: Option<u64>,
+    cache_ttl: Option<Duration>,
+    cache_tti: Option<Duration>,
+    meta_client: Arc<MetaClient>,
+}
+
+impl CachedMetaKvBackendBuilder {
+    pub fn new(meta_client: Arc<MetaClient>) -> Self {
+        Self {
+            cache_max_capacity: None,
+            cache_ttl: None,
+            cache_tti: None,
+            meta_client,
+        }
+    }
+
+    pub fn cache_max_capacity(mut self, cache_max_capacity: u64) -> Self {
+        self.cache_max_capacity.replace(cache_max_capacity);
+        self
+    }
+
+    pub fn cache_ttl(mut self, cache_ttl: Duration) -> Self {
+        self.cache_ttl.replace(cache_ttl);
+        self
+    }
+
+    pub fn cache_tti(mut self, cache_tti: Duration) -> Self {
+        self.cache_tti.replace(cache_tti);
+        self
+    }
+
+    pub fn build(self) -> CachedMetaKvBackend {
+        let cache_max_capacity = self
+            .cache_max_capacity
+            .unwrap_or(DEFAULT_CACHE_MAX_CAPACITY);
+        let cache_ttl = self.cache_ttl.unwrap_or(DEFAULT_CACHE_TTL);
+        let cache_tti = self.cache_tti.unwrap_or(DEFAULT_CACHE_TTI);
+
+        let cache = Arc::new(
+            CacheBuilder::new(cache_max_capacity)
+                .time_to_live(cache_ttl)
+                .time_to_idle(cache_tti)
+                .build(),
+        );
+
+        let kv_backend = Arc::new(MetaKvBackend {
+            client: self.meta_client,
+        });
+        let name = format!("CachedKvBackend({})", kv_backend.name());
+        let version = AtomicUsize::new(0);
+
+        CachedMetaKvBackend {
+            kv_backend,
+            cache,
+            name,
+            version,
+        }
+    }
+}

 pub type CacheBackendRef = Arc<Cache<Vec<u8>, KeyValue>>;

+/// A wrapper of `MetaKvBackend` with cache support.
+///
+/// CachedMetaKvBackend is mainly used to read metadata information from Metasrv, and provides
+/// cache for get and batch_get. One way to trigger cache invalidation of CachedMetaKvBackend:
+/// when metadata information changes, Metasrv will broadcast a metadata invalidation request.
+///
+/// Therefore, it is recommended to use CachedMetaKvBackend to only read metadata related
+/// information. Note: If you read other information, you may read expired data, which depends on
+/// TTL and TTI for cache.
 pub struct CachedMetaKvBackend {
    kv_backend: KvBackendRef,
    cache: CacheBackendRef,
    name: String,
+    version: AtomicUsize,
 }

 impl TxnService for CachedMetaKvBackend {
@@ -96,7 +171,38 @@ impl KvBackend for CachedMetaKvBackend {
    }

    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
-        self.kv_backend.batch_get(req).await
+        let _timer = METRIC_CATALOG_KV_BATCH_GET.start_timer();
+
+        let mut kvs = Vec::with_capacity(req.keys.len());
+        let mut miss_keys = Vec::with_capacity(req.keys.len());
+
+        for key in req.keys {
+            if let Some(val) = self.cache.get(&key).await {
+                kvs.push(val);
+            } else {
+                miss_keys.push(key);
+            }
+        }
+
+        let batch_get_req = BatchGetRequest::new().with_keys(miss_keys.clone());
+
+        let pre_version = self.version();
+
+        let unhit_kvs = self.kv_backend.batch_get(batch_get_req).await?.kvs;
+
+        for kv in unhit_kvs.iter() {
+            self.cache.insert(kv.key().to_vec(), kv.clone()).await;
+        }
+
+        if !self.validate_version(pre_version) {
+            for key in miss_keys.iter() {
+                self.cache.invalidate(key).await;
+            }
+        }
+
+        kvs.extend(unhit_kvs);
+
+        Ok(BatchGetResponse { kvs })
    }

    async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
@@ -154,8 +260,14 @@ impl KvBackend for CachedMetaKvBackend {
    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
        let _timer = METRIC_CATALOG_KV_GET.start_timer();

+        let pre_version = Arc::new(Mutex::new(None));
+
        let init = async {
+            let version_clone = pre_version.clone();
            let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer();
+
+            version_clone.lock().unwrap().replace(self.version());
+
            self.kv_backend.get(key).await.map(|val| {
                val.with_context(|| CacheNotGetSnafu {
                    key: String::from_utf8_lossy(key),
@@ -166,7 +278,7 @@ impl KvBackend for CachedMetaKvBackend {
        // currently moka doesn't have `optionally_try_get_with_by_ref`
        // TODO(fys): change to moka method when available
        // https://github.com/moka-rs/moka/issues/254
-        match self.cache.try_get_with_by_ref(key, init).await {
+        let ret = match self.cache.try_get_with_by_ref(key, init).await {
            Ok(val) => Ok(Some(val)),
            Err(e) => match e.as_ref() {
                CacheNotGet { .. } => Ok(None),
@@ -175,29 +287,40 @@ impl KvBackend for CachedMetaKvBackend {
        }
        .map_err(|e| GetKvCache {
            err_msg: e.to_string(),
-        })
+        });
+
+        // "cache.invalidate_key" and "cache.try_get_with_by_ref" are not mutually exclusive. So we need
+        // to use the version mechanism to prevent expired data from being put into the cache.
+        if pre_version
+            .lock()
+            .unwrap()
+            .as_ref()
+            .map_or(false, |v| !self.validate_version(*v))
+        {
+            self.cache.invalidate(key).await;
+        }
+
+        ret
    }
 }

 #[async_trait::async_trait]
 impl KvCacheInvalidator for CachedMetaKvBackend {
    async fn invalidate_key(&self, key: &[u8]) {
+        self.create_new_version();
        self.cache.invalidate(key).await;
        debug!("invalidated cache key: {}", String::from_utf8_lossy(key));
    }
 }

 impl CachedMetaKvBackend {
-    pub fn new(client: Arc<MetaClient>) -> Self {
-        let kv_backend = Arc::new(MetaKvBackend { client });
-        Self::wrap(kv_backend)
-    }
-
-    pub fn wrap(kv_backend: KvBackendRef) -> Self {
+    // only for test
+    #[cfg(test)]
+    fn wrap(kv_backend: KvBackendRef) -> Self {
        let cache = Arc::new(
-            CacheBuilder::new(CACHE_MAX_CAPACITY)
-                .time_to_live(Duration::from_secs(CACHE_TTL_SECOND))
-                .time_to_idle(Duration::from_secs(CACHE_TTI_SECOND))
+            CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
+                .time_to_live(DEFAULT_CACHE_TTL)
+                .time_to_idle(DEFAULT_CACHE_TTI)
                .build(),
        );

@@ -206,12 +329,25 @@ impl CachedMetaKvBackend {
            kv_backend,
            cache,
            name,
+            version: AtomicUsize::new(0),
        }
    }

    pub fn cache(&self) -> &CacheBackendRef {
        &self.cache
    }
+
+    fn version(&self) -> usize {
+        self.version.load(Ordering::Relaxed)
+    }
+
+    fn validate_version(&self, pre_version: usize) -> bool {
+        self.version() == pre_version
+    }
+
+    fn create_new_version(&self) -> usize {
+        self.version.fetch_add(1, Ordering::Relaxed) + 1
+    }
 }

 #[derive(Debug)]
@@ -308,3 +444,162 @@ impl KvBackend for MetaKvBackend {
        self
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::any::Any;
+    use std::sync::atomic::{AtomicU32, Ordering};
+    use std::sync::Arc;
+
+    use async_trait::async_trait;
+    use common_meta::kv_backend::{KvBackend, TxnService};
+    use common_meta::rpc::store::{
+        BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse,
+        BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
+        DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse, RangeRequest,
+        RangeResponse,
+    };
+    use common_meta::rpc::KeyValue;
+    use dashmap::DashMap;
+
+    use super::CachedMetaKvBackend;
+
+    #[derive(Default)]
+    pub struct SimpleKvBackend {
+        inner_map: DashMap<Vec<u8>, Vec<u8>>,
+        get_execute_times: Arc<AtomicU32>,
+    }
+
+    impl TxnService for SimpleKvBackend {
+        type Error = common_meta::error::Error;
+    }
+
+    #[async_trait]
+    impl KvBackend for SimpleKvBackend {
+        fn name(&self) -> &str {
+            "SimpleKvBackend"
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse, Self::Error> {
+            let mut kvs = Vec::with_capacity(req.keys.len());
+            for key in req.keys.iter() {
+                if let Some(kv) = self.get(key).await? {
+                    kvs.push(kv);
+                }
+            }
+            Ok(BatchGetResponse { kvs })
+        }
+
+        async fn put(&self, req: PutRequest) -> Result<PutResponse, Self::Error> {
+            self.inner_map.insert(req.key, req.value);
+            // always return None as prev_kv, since we don't use it in this test.
+            Ok(PutResponse { prev_kv: None })
+        }
+
+        async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>, Self::Error> {
+            self.get_execute_times
+                .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+            Ok(self.inner_map.get(key).map(|v| KeyValue {
+                key: key.to_vec(),
+                value: v.value().clone(),
+            }))
+        }
+
+        async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
+            todo!()
+        }
+
+        async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
+            todo!()
+        }
+
+        async fn compare_and_put(
+            &self,
+            _req: CompareAndPutRequest,
+        ) -> Result<CompareAndPutResponse, Self::Error> {
+            todo!()
+        }
+
+        async fn delete_range(
+            &self,
+            _req: DeleteRangeRequest,
+        ) -> Result<DeleteRangeResponse, Self::Error> {
+            todo!()
+        }
+
+        async fn batch_delete(
+            &self,
+            _req: BatchDeleteRequest,
+        ) -> Result<BatchDeleteResponse, Self::Error> {
+            todo!()
+        }
+    }
+
+    #[tokio::test]
+    async fn test_cached_kv_backend() {
+        let simple_kv = Arc::new(SimpleKvBackend::default());
+        let get_execute_times = simple_kv.get_execute_times.clone();
+        let cached_kv = CachedMetaKvBackend::wrap(simple_kv);
+
+        add_some_vals(&cached_kv).await;
+
+        let batch_get_req = BatchGetRequest {
+            keys: vec![b"k1".to_vec(), b"k2".to_vec()],
+        };
+
+        assert_eq!(get_execute_times.load(Ordering::SeqCst), 0);
+
+        for _ in 0..10 {
+            let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
+
+            assert_eq!(get_execute_times.load(Ordering::SeqCst), 2);
+        }
+
+        let batch_get_req = BatchGetRequest {
+            keys: vec![b"k1".to_vec(), b"k2".to_vec(), b"k3".to_vec()],
+        };
+
+        let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
+
+        assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
+
+        for _ in 0..10 {
+            let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
+
+            assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
+        }
+    }
+
+    async fn add_some_vals(kv_backend: &impl KvBackend) {
+        kv_backend
+            .put(PutRequest {
+                key: b"k1".to_vec(),
+                value: b"v1".to_vec(),
+                prev_kv: false,
+            })
+            .await
+            .unwrap();
+
+        kv_backend
+            .put(PutRequest {
+                key: b"k2".to_vec(),
+                value: b"v2".to_vec(),
+                prev_kv: false,
+            })
+            .await
+            .unwrap();
+
+        kv_backend
+            .put(PutRequest {
+                key: b"k3".to_vec(),
+                value: b"v3".to_vec(),
+                prev_kv: false,
+            })
+            .await
+            .unwrap();
+    }
+}
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -16,18 +16,21 @@ use std::any::Any;
 use std::collections::BTreeSet;
 use std::sync::{Arc, Weak};

+use async_stream::try_stream;
 use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID};
 use common_error::ext::BoxedError;
 use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
-use common_meta::datanode_manager::DatanodeManagerRef;
 use common_meta::error::Result as MetaResult;
 use common_meta::key::catalog_name::CatalogNameKey;
 use common_meta::key::schema_name::SchemaNameKey;
+use common_meta::key::table_info::TableInfoValue;
 use common_meta::key::table_name::TableNameKey;
 use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
 use common_meta::table_name::TableName;
-use futures_util::TryStreamExt;
+use futures_util::stream::BoxStream;
+use futures_util::{StreamExt, TryStreamExt};
+use moka::sync::Cache;
 use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
 use snafu::prelude::*;
 use table::dist_table::DistTable;
@@ -36,10 +39,10 @@ use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
 use table::TableRef;

 use crate::error::{
-    self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, Result as CatalogResult,
-    TableMetadataManagerSnafu,
+    self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu,
+    Result as CatalogResult, TableMetadataManagerSnafu,
 };
-use crate::information_schema::{InformationSchemaProvider, COLUMNS, TABLES};
+use crate::information_schema::InformationSchemaProvider;
 use crate::CatalogManager;

 /// Access all existing catalog, schema and tables.
@@ -55,39 +58,49 @@ pub struct KvBackendCatalogManager {
    cache_invalidator: CacheInvalidatorRef,
    partition_manager: PartitionRuleManagerRef,
    table_metadata_manager: TableMetadataManagerRef,
-    datanode_manager: DatanodeManagerRef,
    /// A sub-CatalogManager that handles system tables
    system_catalog: SystemCatalog,
 }

+fn make_table(table_info_value: TableInfoValue) -> CatalogResult<TableRef> {
+    let table_info = table_info_value
+        .table_info
+        .try_into()
+        .context(catalog_err::InvalidTableInfoInCatalogSnafu)?;
+    Ok(DistTable::table(Arc::new(table_info)))
+}
+
 #[async_trait::async_trait]
 impl CacheInvalidator for KvBackendCatalogManager {
-    async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
-        self.cache_invalidator
-            .invalidate_table_name(ctx, table_name)
-            .await
-    }
-
    async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
        self.cache_invalidator
            .invalidate_table_id(ctx, table_id)
            .await
    }
+
+    async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
+        self.cache_invalidator
+            .invalidate_table_name(ctx, table_name)
+            .await
+    }
 }

+const DEFAULT_CACHED_CATALOG: u64 = 128;
+
 impl KvBackendCatalogManager {
-    pub fn new(
-        backend: KvBackendRef,
-        cache_invalidator: CacheInvalidatorRef,
-        datanode_manager: DatanodeManagerRef,
-    ) -> Arc<Self> {
+    pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
        Arc::new_cyclic(|me| Self {
            partition_manager: Arc::new(PartitionRuleManager::new(backend.clone())),
            table_metadata_manager: Arc::new(TableMetadataManager::new(backend)),
            cache_invalidator,
-            datanode_manager,
            system_catalog: SystemCatalog {
                catalog_manager: me.clone(),
+                catalog_cache: Cache::new(DEFAULT_CACHED_CATALOG),
+                information_schema_provider: Arc::new(InformationSchemaProvider::new(
+                    // The catalog name is not used in system_catalog, so let it empty
+                    String::default(),
+                    me.clone(),
+                )),
            },
        })
    }
@@ -99,14 +112,14 @@ impl KvBackendCatalogManager {
    pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
        &self.table_metadata_manager
    }
-
-    pub fn datanode_manager(&self) -> DatanodeManagerRef {
-        self.datanode_manager.clone()
-    }
 }

 #[async_trait::async_trait]
 impl CatalogManager for KvBackendCatalogManager {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
    async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
        let stream = self
            .table_metadata_manager
@@ -133,28 +146,30 @@ impl CatalogManager for KvBackendCatalogManager {
            .try_collect::<BTreeSet<_>>()
            .await
            .map_err(BoxedError::new)
-            .context(ListSchemasSnafu { catalog })?
-            .into_iter()
-            .collect::<Vec<_>>();
+            .context(ListSchemasSnafu { catalog })?;

-        keys.extend_from_slice(&self.system_catalog.schema_names());
+        keys.extend(self.system_catalog.schema_names());

-        Ok(keys)
+        Ok(keys.into_iter().collect())
    }

    async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> {
-        let mut tables = self
+        let stream = self
            .table_metadata_manager
            .table_name_manager()
            .tables(catalog, schema)
+            .await;
+        let mut tables = stream
+            .try_collect::<Vec<_>>()
            .await
-            .context(TableMetadataManagerSnafu)?
+            .map_err(BoxedError::new)
+            .context(ListTablesSnafu { catalog, schema })?
            .into_iter()
            .map(|(k, _)| k)
-            .collect::<Vec<String>>();
+            .collect::<Vec<_>>();
        tables.extend_from_slice(&self.system_catalog.table_names(schema));

-        Ok(tables)
+        Ok(tables.into_iter().collect())
    }

    async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> {
@@ -223,17 +238,56 @@ impl CatalogManager for KvBackendCatalogManager {
        else {
            return Ok(None);
        };
-        let table_info = Arc::new(
-            table_info_value
-                .table_info
-                .try_into()
-                .context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
-        );
-        Ok(Some(DistTable::table(table_info)))
+        make_table(table_info_value).map(Some)
    }

-    fn as_any(&self) -> &dyn Any {
-        self
+    async fn tables<'a>(
+        &'a self,
+        catalog: &'a str,
+        schema: &'a str,
+    ) -> BoxStream<'a, CatalogResult<TableRef>> {
+        let sys_tables = try_stream!({
+            // System tables
+            let sys_table_names = self.system_catalog.table_names(schema);
+            for table_name in sys_table_names {
+                if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) {
+                    yield table;
+                }
+            }
+        });
+
+        let table_id_stream = self
+            .table_metadata_manager
+            .table_name_manager()
+            .tables(catalog, schema)
+            .await
+            .map_ok(|(_, v)| v.table_id());
+        const BATCH_SIZE: usize = 128;
+        let user_tables = try_stream!({
+            // Split table ids into chunks
+            let mut table_id_chunks = table_id_stream.ready_chunks(BATCH_SIZE);
+
+            while let Some(table_ids) = table_id_chunks.next().await {
+                let table_ids = table_ids
+                    .into_iter()
+                    .collect::<Result<Vec<_>, _>>()
+                    .map_err(BoxedError::new)
+                    .context(ListTablesSnafu { catalog, schema })?;
+
+                let table_info_values = self
+                    .table_metadata_manager
+                    .table_info_manager()
+                    .batch_get(&table_ids)
+                    .await
+                    .context(TableMetadataManagerSnafu)?;
+
+                for table_info_value in table_info_values.into_values() {
+                    yield make_table(table_info_value)?;
+                }
+            }
+        });
+
+        Box::pin(sys_tables.chain(user_tables))
    }
 }

@@ -242,11 +296,12 @@ impl CatalogManager for KvBackendCatalogManager {
 // a new catalog is created.
 /// Existing system tables:
 /// - public.numbers
-/// - information_schema.tables
-/// - information_schema.columns
+/// - information_schema.{tables}
 #[derive(Clone)]
 struct SystemCatalog {
    catalog_manager: Weak<KvBackendCatalogManager>,
+    catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
+    information_schema_provider: Arc<InformationSchemaProvider>,
 }

 impl SystemCatalog {
@@ -256,7 +311,7 @@ impl SystemCatalog {

    fn table_names(&self, schema: &str) -> Vec<String> {
        if schema == INFORMATION_SCHEMA_NAME {
-            vec![TABLES.to_string(), COLUMNS.to_string()]
+            self.information_schema_provider.table_names()
        } else if schema == DEFAULT_SCHEMA_NAME {
            vec![NUMBERS_TABLE_NAME.to_string()]
        } else {
@@ -270,7 +325,7 @@ impl SystemCatalog {

    fn table_exist(&self, schema: &str, table: &str) -> bool {
        if schema == INFORMATION_SCHEMA_NAME {
-            table == TABLES || table == COLUMNS
+            self.information_schema_provider.table(table).is_some()
        } else if schema == DEFAULT_SCHEMA_NAME {
            table == NUMBERS_TABLE_NAME
        } else {
@@ -281,7 +336,12 @@ impl SystemCatalog {
    fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
        if schema == INFORMATION_SCHEMA_NAME {
            let information_schema_provider =
-                InformationSchemaProvider::new(catalog.to_string(), self.catalog_manager.clone());
+                self.catalog_cache.get_with_by_ref(catalog, move || {
+                    Arc::new(InformationSchemaProvider::new(
+                        catalog.to_string(),
+                        self.catalog_manager.clone(),
+                    ))
+                });
            information_schema_provider.table(table_name)
        } else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
            Some(NumbersTable::table(NUMBERS_TABLE_ID))
--- a/src/catalog/src/kvbackend/mock.rs
+++ b/src/catalog/src/kvbackend/mock.rs
@@ -1,128 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock as StdRwLock};
-
-use common_recordbatch::RecordBatch;
-use datatypes::data_type::ConcreteDataType;
-use datatypes::schema::{ColumnSchema, Schema};
-use datatypes::vectors::StringVector;
-use table::engine::{CloseTableResult, EngineContext, TableEngine};
-use table::metadata::TableId;
-use table::requests::{
-    AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
-    TruncateTableRequest,
-};
-use table::test_util::MemTable;
-use table::TableRef;
-
-#[derive(Default)]
-pub struct MockTableEngine {
-    tables: StdRwLock<HashMap<TableId, TableRef>>,
-}
-
-#[async_trait::async_trait]
-impl TableEngine for MockTableEngine {
-    fn name(&self) -> &str {
-        "MockTableEngine"
-    }
-
-    /// Create a table with only one column
-    async fn create_table(
-        &self,
-        _ctx: &EngineContext,
-        request: CreateTableRequest,
-    ) -> table::Result<TableRef> {
-        let table_id = request.id;
-
-        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
-            "name",
-            ConcreteDataType::string_datatype(),
-            true,
-        )]));
-
-        let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
-        let record_batch = RecordBatch::new(schema, data).unwrap();
-        let table = MemTable::new_with_catalog(
-            &request.table_name,
-            record_batch,
-            table_id,
-            request.catalog_name,
-            request.schema_name,
-            vec![0],
-        );
-
-        let mut tables = self.tables.write().unwrap();
-        let _ = tables.insert(table_id, table.clone() as TableRef);
-        Ok(table)
-    }
-
-    async fn open_table(
-        &self,
-        _ctx: &EngineContext,
-        request: OpenTableRequest,
-    ) -> table::Result<Option<TableRef>> {
-        Ok(self.tables.read().unwrap().get(&request.table_id).cloned())
-    }
-
-    async fn alter_table(
-        &self,
-        _ctx: &EngineContext,
-        _request: AlterTableRequest,
-    ) -> table::Result<TableRef> {
-        unimplemented!()
-    }
-
-    fn get_table(
-        &self,
-        _ctx: &EngineContext,
-        table_id: TableId,
-    ) -> table::Result<Option<TableRef>> {
-        Ok(self.tables.read().unwrap().get(&table_id).cloned())
-    }
-
-    fn table_exists(&self, _ctx: &EngineContext, table_id: TableId) -> bool {
-        self.tables.read().unwrap().contains_key(&table_id)
-    }
-
-    async fn drop_table(
-        &self,
-        _ctx: &EngineContext,
-        _request: DropTableRequest,
-    ) -> table::Result<bool> {
-        unimplemented!()
-    }
-
-    async fn close_table(
-        &self,
-        _ctx: &EngineContext,
-        request: CloseTableRequest,
-    ) -> table::Result<CloseTableResult> {
-        let _ = self.tables.write().unwrap().remove(&request.table_id);
-        Ok(CloseTableResult::Released(vec![]))
-    }
-
-    async fn close(&self) -> table::Result<()> {
-        Ok(())
-    }
-
-    async fn truncate_table(
-        &self,
-        _ctx: &EngineContext,
-        _request: TruncateTableRequest,
-    ) -> table::Result<bool> {
-        Ok(true)
-    }
-}
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#![feature(trait_upcasting)]
 #![feature(assert_matches)]
 #![feature(try_blocks)]

@@ -21,6 +20,7 @@ use std::fmt::{Debug, Formatter};
 use std::sync::Arc;

 use futures::future::BoxFuture;
+use futures_util::stream::BoxStream;
 use table::metadata::TableId;
 use table::requests::CreateTableRequest;
 use table::TableRef;
@@ -57,6 +57,13 @@ pub trait CatalogManager: Send + Sync {
        schema: &str,
        table_name: &str,
    ) -> Result<Option<TableRef>>;
+
+    /// Returns all tables with a stream by catalog and schema.
+    async fn tables<'a>(
+        &'a self,
+        catalog: &'a str,
+        schema: &'a str,
+    ) -> BoxStream<'a, Result<TableRef>>;
 }

 pub type CatalogManagerRef = Arc<dyn CatalogManager>;
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -17,8 +17,12 @@ use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::sync::{Arc, RwLock, Weak};

+use async_stream::{stream, try_stream};
 use common_catalog::build_db_string;
-use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME};
+use common_catalog::consts::{
+    DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
+};
+use futures_util::stream::BoxStream;
 use snafu::OptionExt;
 use table::TableRef;

@@ -37,10 +41,64 @@ pub struct MemoryCatalogManager {

 #[async_trait::async_trait]
 impl CatalogManager for MemoryCatalogManager {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    async fn catalog_names(&self) -> Result<Vec<String>> {
+        Ok(self.catalogs.read().unwrap().keys().cloned().collect())
+    }
+
+    async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
+        Ok(self
+            .catalogs
+            .read()
+            .unwrap()
+            .get(catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: catalog,
+            })?
+            .keys()
+            .cloned()
+            .collect())
+    }
+
+    async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
+        Ok(self
+            .catalogs
+            .read()
+            .unwrap()
+            .get(catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: catalog,
+            })?
+            .get(schema)
+            .with_context(|| SchemaNotFoundSnafu { catalog, schema })?
+            .keys()
+            .cloned()
+            .collect())
+    }
+
+    async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
+        self.catalog_exist_sync(catalog)
+    }
+
    async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
        self.schema_exist_sync(catalog, schema)
    }

+    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
+        let catalogs = self.catalogs.read().unwrap();
+        Ok(catalogs
+            .get(catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: catalog,
+            })?
+            .get(schema)
+            .with_context(|| SchemaNotFoundSnafu { catalog, schema })?
+            .contains_key(table))
+    }
+
    async fn table(
        &self,
        catalog: &str,
@@ -59,57 +117,35 @@ impl CatalogManager for MemoryCatalogManager {
        Ok(result)
    }

-    async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
-        self.catalog_exist_sync(catalog)
-    }
-
-    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
+    async fn tables<'a>(
+        &'a self,
+        catalog: &'a str,
+        schema: &'a str,
+    ) -> BoxStream<'a, Result<TableRef>> {
        let catalogs = self.catalogs.read().unwrap();
-        Ok(catalogs
-            .get(catalog)
-            .with_context(|| CatalogNotFoundSnafu {
-                catalog_name: catalog,
-            })?
-            .get(schema)
-            .with_context(|| SchemaNotFoundSnafu { catalog, schema })?
-            .contains_key(table))
-    }

-    async fn catalog_names(&self) -> Result<Vec<String>> {
-        Ok(self.catalogs.read().unwrap().keys().cloned().collect())
-    }
+        let Some(schemas) = catalogs.get(catalog) else {
+            return Box::pin(stream!({
+                yield CatalogNotFoundSnafu {
+                    catalog_name: catalog,
+                }
+                .fail();
+            }));
+        };

-    async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
-        Ok(self
-            .catalogs
-            .read()
-            .unwrap()
-            .get(catalog_name)
-            .with_context(|| CatalogNotFoundSnafu { catalog_name })?
-            .keys()
-            .cloned()
-            .collect())
-    }
+        let Some(tables) = schemas.get(schema) else {
+            return Box::pin(stream!({
+                yield SchemaNotFoundSnafu { catalog, schema }.fail();
+            }));
+        };

-    async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
-        Ok(self
-            .catalogs
-            .read()
-            .unwrap()
-            .get(catalog_name)
-            .with_context(|| CatalogNotFoundSnafu { catalog_name })?
-            .get(schema_name)
-            .with_context(|| SchemaNotFoundSnafu {
-                catalog: catalog_name,
-                schema: schema_name,
-            })?
-            .keys()
-            .cloned()
-            .collect())
-    }
+        let tables = tables.values().cloned().collect::<Vec<_>>();

-    fn as_any(&self) -> &dyn Any {
-        self
+        return Box::pin(try_stream!({
+            for table in tables {
+                yield table;
+            }
+        }));
    }
 }

@@ -135,6 +171,18 @@ impl MemoryCatalogManager {
                schema: DEFAULT_SCHEMA_NAME.to_string(),
            })
            .unwrap();
+        manager
+            .register_schema_sync(RegisterSchemaRequest {
+                catalog: DEFAULT_CATALOG_NAME.to_string(),
+                schema: DEFAULT_PRIVATE_SCHEMA_NAME.to_string(),
+            })
+            .unwrap();
+        manager
+            .register_schema_sync(RegisterSchemaRequest {
+                catalog: DEFAULT_CATALOG_NAME.to_string(),
+                schema: INFORMATION_SCHEMA_NAME.to_string(),
+            })
+            .unwrap();

        manager
    }
@@ -243,10 +291,12 @@ impl MemoryCatalogManager {
    }

    fn create_catalog_entry(self: &Arc<Self>, catalog: String) -> SchemaEntries {
-        let information_schema = InformationSchemaProvider::build(
+        let information_schema_provider = InformationSchemaProvider::new(
            catalog,
            Arc::downgrade(self) as Weak<dyn CatalogManager>,
        );
+        let information_schema = information_schema_provider.tables().clone();
+
        let mut catalog = HashMap::new();
        catalog.insert(INFORMATION_SCHEMA_NAME.to_string(), information_schema);
        catalog
@@ -291,6 +341,7 @@ pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> {
 #[cfg(test)]
 mod tests {
    use common_catalog::consts::*;
+    use futures_util::TryStreamExt;
    use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};

    use super::*;
@@ -315,8 +366,18 @@ mod tests {
                NUMBERS_TABLE_NAME,
            )
            .await
+            .unwrap()
            .unwrap();
-        let _ = table.unwrap();
+        let stream = catalog_list
+            .tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
+            .await;
+        let tables = stream.try_collect::<Vec<_>>().await.unwrap();
+        assert_eq!(tables.len(), 1);
+        assert_eq!(
+            table.table_info().table_id(),
+            tables[0].table_info().table_id()
+        );
+
        assert!(catalog_list
            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
            .await
--- a/src/catalog/src/metrics.rs
+++ b/src/catalog/src/metrics.rs
@@ -19,17 +19,19 @@ use prometheus::*;

 lazy_static! {
    pub static ref METRIC_CATALOG_MANAGER_CATALOG_COUNT: IntGauge =
-        register_int_gauge!("catalog_catalog_count", "catalog catalog count").unwrap();
+        register_int_gauge!("greptime_catalog_catalog_count", "catalog catalog count").unwrap();
    pub static ref METRIC_CATALOG_MANAGER_SCHEMA_COUNT: IntGauge =
-        register_int_gauge!("catalog_schema_count", "catalog schema count").unwrap();
+        register_int_gauge!("greptime_catalog_schema_count", "catalog schema count").unwrap();
    pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!(
-        "catalog_table_count",
+        "greptime_catalog_table_count",
        "catalog table count",
        &[METRIC_DB_LABEL]
    )
    .unwrap();
    pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
-        register_histogram!("catalog_kv_get_remote", "catalog kv get remote").unwrap();
+        register_histogram!("greptime_catalog_kv_get_remote", "catalog kv get remote").unwrap();
    pub static ref METRIC_CATALOG_KV_GET: Histogram =
-        register_histogram!("catalog_kv_get", "catalog kv get").unwrap();
+        register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap();
+    pub static ref METRIC_CATALOG_KV_BATCH_GET: Histogram =
+        register_histogram!("greptime_catalog_kv_batch_get", "catalog kv batch get").unwrap();
 }
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -15,7 +15,6 @@
 use std::collections::HashMap;
 use std::sync::Arc;

-use common_catalog::consts::INFORMATION_SCHEMA_NAME;
 use common_catalog::format_full_table_name;
 use datafusion::common::{ResolvedTableReference, TableReference};
 use datafusion::datasource::provider_as_source;
@@ -30,7 +29,7 @@ use crate::CatalogManagerRef;
 pub struct DfTableSourceProvider {
    catalog_manager: CatalogManagerRef,
    resolved_tables: HashMap<String, Arc<dyn TableSource>>,
-    disallow_cross_schema_query: bool,
+    disallow_cross_catalog_query: bool,
    default_catalog: String,
    default_schema: String,
 }
@@ -38,12 +37,12 @@ pub struct DfTableSourceProvider {
 impl DfTableSourceProvider {
    pub fn new(
        catalog_manager: CatalogManagerRef,
-        disallow_cross_schema_query: bool,
+        disallow_cross_catalog_query: bool,
        query_ctx: &QueryContext,
    ) -> Self {
        Self {
            catalog_manager,
-            disallow_cross_schema_query,
+            disallow_cross_catalog_query,
            resolved_tables: HashMap::new(),
            default_catalog: query_ctx.current_catalog().to_owned(),
            default_schema: query_ctx.current_schema().to_owned(),
@@ -54,29 +53,18 @@ impl DfTableSourceProvider {
        &'a self,
        table_ref: TableReference<'a>,
    ) -> Result<ResolvedTableReference<'a>> {
-        if self.disallow_cross_schema_query {
+        if self.disallow_cross_catalog_query {
            match &table_ref {
                TableReference::Bare { .. } => (),
-                TableReference::Partial { schema, .. } => {
-                    ensure!(
-                        schema.as_ref() == self.default_schema
-                            || schema.as_ref() == INFORMATION_SCHEMA_NAME,
-                        QueryAccessDeniedSnafu {
-                            catalog: &self.default_catalog,
-                            schema: schema.as_ref(),
-                        }
-                    );
-                }
+                TableReference::Partial { .. } => {}
                TableReference::Full {
                    catalog, schema, ..
                } => {
                    ensure!(
-                        catalog.as_ref() == self.default_catalog
-                            && (schema.as_ref() == self.default_schema
-                                || schema.as_ref() == INFORMATION_SCHEMA_NAME),
+                        catalog.as_ref() == self.default_catalog,
                        QueryAccessDeniedSnafu {
                            catalog: catalog.as_ref(),
-                            schema: schema.as_ref()
+                            schema: schema.as_ref(),
                        }
                    );
                }
@@ -136,21 +124,21 @@ mod tests {
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        let _ = result.unwrap();
+        assert!(result.is_ok());

        let table_ref = TableReference::Partial {
            schema: Cow::Borrowed("public"),
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        let _ = result.unwrap();
+        assert!(result.is_ok());

        let table_ref = TableReference::Partial {
            schema: Cow::Borrowed("wrong_schema"),
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        assert!(result.is_err());
+        assert!(result.is_ok());

        let table_ref = TableReference::Full {
            catalog: Cow::Borrowed("greptime"),
@@ -158,7 +146,7 @@ mod tests {
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        let _ = result.unwrap();
+        assert!(result.is_ok());

        let table_ref = TableReference::Full {
            catalog: Cow::Borrowed("wrong_catalog"),
@@ -172,14 +160,15 @@ mod tests {
            schema: Cow::Borrowed("information_schema"),
            table: Cow::Borrowed("columns"),
        };
-        let _ = table_provider.resolve_table_ref(table_ref).unwrap();
+        let result = table_provider.resolve_table_ref(table_ref);
+        assert!(result.is_ok());

        let table_ref = TableReference::Full {
            catalog: Cow::Borrowed("greptime"),
            schema: Cow::Borrowed("information_schema"),
            table: Cow::Borrowed("columns"),
        };
-        let _ = table_provider.resolve_table_ref(table_ref).unwrap();
+        assert!(table_provider.resolve_table_ref(table_ref).is_ok());

        let table_ref = TableReference::Full {
            catalog: Cow::Borrowed("dummy"),
@@ -187,5 +176,12 @@ mod tests {
            table: Cow::Borrowed("columns"),
        };
        assert!(table_provider.resolve_table_ref(table_ref).is_err());
+
+        let table_ref = TableReference::Full {
+            catalog: Cow::Borrowed("greptime"),
+            schema: Cow::Borrowed("greptime_private"),
+            table: Cow::Borrowed("columns"),
+        };
+        assert!(table_provider.resolve_table_ref(table_ref).is_ok());
    }
 }
--- a/src/client/Cargo.toml
+++ b/src/client/Cargo.toml
@@ -9,6 +9,7 @@ testing = []

 [dependencies]
 api.workspace = true
+arc-swap = "1.6"
 arrow-flight.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
@@ -35,8 +36,8 @@ prost.workspace = true
 rand.workspace = true
 session.workspace = true
 snafu.workspace = true
-tokio-stream = { version = "0.1", features = ["net"] }
 tokio.workspace = true
+tokio-stream = { workspace = true, features = ["net"] }
 tonic.workspace = true

 [dev-dependencies]
--- a/src/client/examples/logical.rs
+++ b/src/client/examples/logical.rs
@@ -37,7 +37,7 @@ async fn run() {
        catalog_name: "greptime".to_string(),
        schema_name: "public".to_string(),
        table_name: "test_logical_dist_exec".to_string(),
-        desc: "".to_string(),
+        desc: String::default(),
        column_defs: vec![
            ColumnDef {
                name: "timestamp".to_string(),
@@ -46,6 +46,7 @@ async fn run() {
                default_constraint: vec![],
                semantic_type: SemanticType::Timestamp as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "key".to_string(),
@@ -54,6 +55,7 @@ async fn run() {
                default_constraint: vec![],
                semantic_type: SemanticType::Tag as i32,
                comment: String::new(),
+                ..Default::default()
            },
            ColumnDef {
                name: "value".to_string(),
@@ -62,6 +64,7 @@ async fn run() {
                default_constraint: vec![],
                semantic_type: SemanticType::Field as i32,
                comment: String::new(),
+                ..Default::default()
            },
        ],
        time_index: "timestamp".to_string(),
@@ -78,7 +81,7 @@ async fn run() {

    let logical = mock_logical_plan();
    event!(Level::INFO, "plan size: {:#?}", logical.len());
-    let result = db.logical_plan(logical, 0).await.unwrap();
+    let result = db.logical_plan(logical).await.unwrap();

    event!(Level::INFO, "result: {:#?}", result);
 }
--- a/src/client/src/client.rs
+++ b/src/client/src/client.rs
@@ -122,7 +122,7 @@ impl Client {
        self.inner.set_peers(urls);
    }

-    fn find_channel(&self) -> Result<(String, Channel)> {
+    pub fn find_channel(&self) -> Result<(String, Channel)> {
        let addr = self
            .inner
            .get_peer()
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -27,8 +27,9 @@ use common_error::ext::{BoxedError, ErrorExt};
 use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_query::Output;
 use common_recordbatch::error::ExternalSnafu;
-use common_recordbatch::RecordBatchStreamAdaptor;
+use common_recordbatch::RecordBatchStreamWrapper;
 use common_telemetry::logging;
+use common_telemetry::tracing_context::W3cTrace;
 use futures_util::StreamExt;
 use prost::Message;
 use snafu::{ensure, ResultExt};
@@ -46,6 +47,9 @@ pub struct Database {
    // The dbname follows naming rule as out mysql, postgres and http
    // protocol. The server treat dbname in priority of catalog/schema.
    dbname: String,
+    // The time zone indicates the time zone where the user is located.
+    // Some queries need to be aware of the user's time zone to perform some specific actions.
+    timezone: String,

    client: Client,
    ctx: FlightContext,
@@ -57,7 +61,8 @@ impl Database {
        Self {
            catalog: catalog.into(),
            schema: schema.into(),
-            dbname: "".to_string(),
+            dbname: String::default(),
+            timezone: String::default(),
            client,
            ctx: FlightContext::default(),
        }
@@ -72,8 +77,9 @@ impl Database {
    /// environment
    pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
        Self {
-            catalog: "".to_string(),
-            schema: "".to_string(),
+            catalog: String::default(),
+            schema: String::default(),
+            timezone: String::default(),
            dbname: dbname.into(),
            client,
            ctx: FlightContext::default(),
@@ -104,6 +110,14 @@ impl Database {
        self.dbname = dbname.into();
    }

+    pub fn timezone(&self) -> &String {
+        &self.timezone
+    }
+
+    pub fn set_timezone(&mut self, timezone: impl Into<String>) {
+        self.timezone = timezone.into();
+    }
+
    pub fn set_auth(&mut self, auth: AuthScheme) {
        self.ctx.auth_header = Some(AuthHeader {
            auth_scheme: Some(auth),
@@ -147,21 +161,22 @@ impl Database {

    async fn handle(&self, request: Request) -> Result<u32> {
        let mut client = self.client.make_database_client()?.inner;
-        let request = self.to_rpc_request(request, 0);
+        let request = self.to_rpc_request(request);
        let response = client.handle(request).await?.into_inner();
        from_grpc_response(response)
    }

    #[inline]
-    fn to_rpc_request(&self, request: Request, trace_id: u64) -> GreptimeRequest {
+    fn to_rpc_request(&self, request: Request) -> GreptimeRequest {
        GreptimeRequest {
            header: Some(RequestHeader {
                catalog: self.catalog.clone(),
                schema: self.schema.clone(),
                authorization: self.ctx.auth_header.clone(),
                dbname: self.dbname.clone(),
-                trace_id,
-                span_id: 0,
+                timezone: self.timezone.clone(),
+                // TODO(Taylor-lagrange): add client grpc tracing
+                tracing_context: W3cTrace::new(),
            }),
            request: Some(request),
        }
@@ -172,23 +187,17 @@ impl Database {
        S: AsRef<str>,
    {
        let _timer = metrics::METRIC_GRPC_SQL.start_timer();
-        self.do_get(
-            Request::Query(QueryRequest {
-                query: Some(Query::Sql(sql.as_ref().to_string())),
-            }),
-            0,
-        )
+        self.do_get(Request::Query(QueryRequest {
+            query: Some(Query::Sql(sql.as_ref().to_string())),
+        }))
        .await
    }

-    pub async fn logical_plan(&self, logical_plan: Vec<u8>, trace_id: u64) -> Result<Output> {
+    pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
        let _timer = metrics::METRIC_GRPC_LOGICAL_PLAN.start_timer();
-        self.do_get(
-            Request::Query(QueryRequest {
-                query: Some(Query::LogicalPlan(logical_plan)),
-            }),
-            trace_id,
-        )
+        self.do_get(Request::Query(QueryRequest {
+            query: Some(Query::LogicalPlan(logical_plan)),
+        }))
        .await
    }

@@ -200,68 +209,53 @@ impl Database {
        step: &str,
    ) -> Result<Output> {
        let _timer = metrics::METRIC_GRPC_PROMQL_RANGE_QUERY.start_timer();
-        self.do_get(
-            Request::Query(QueryRequest {
-                query: Some(Query::PromRangeQuery(PromRangeQuery {
-                    query: promql.to_string(),
-                    start: start.to_string(),
-                    end: end.to_string(),
-                    step: step.to_string(),
-                })),
-            }),
-            0,
-        )
+        self.do_get(Request::Query(QueryRequest {
+            query: Some(Query::PromRangeQuery(PromRangeQuery {
+                query: promql.to_string(),
+                start: start.to_string(),
+                end: end.to_string(),
+                step: step.to_string(),
+            })),
+        }))
        .await
    }

    pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
        let _timer = metrics::METRIC_GRPC_CREATE_TABLE.start_timer();
-        self.do_get(
-            Request::Ddl(DdlRequest {
-                expr: Some(DdlExpr::CreateTable(expr)),
-            }),
-            0,
-        )
+        self.do_get(Request::Ddl(DdlRequest {
+            expr: Some(DdlExpr::CreateTable(expr)),
+        }))
        .await
    }

    pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
        let _timer = metrics::METRIC_GRPC_ALTER.start_timer();
-        self.do_get(
-            Request::Ddl(DdlRequest {
-                expr: Some(DdlExpr::Alter(expr)),
-            }),
-            0,
-        )
+        self.do_get(Request::Ddl(DdlRequest {
+            expr: Some(DdlExpr::Alter(expr)),
+        }))
        .await
    }

    pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> {
        let _timer = metrics::METRIC_GRPC_DROP_TABLE.start_timer();
-        self.do_get(
-            Request::Ddl(DdlRequest {
-                expr: Some(DdlExpr::DropTable(expr)),
-            }),
-            0,
-        )
+        self.do_get(Request::Ddl(DdlRequest {
+            expr: Some(DdlExpr::DropTable(expr)),
+        }))
        .await
    }

    pub async fn truncate_table(&self, expr: TruncateTableExpr) -> Result<Output> {
        let _timer = metrics::METRIC_GRPC_TRUNCATE_TABLE.start_timer();
-        self.do_get(
-            Request::Ddl(DdlRequest {
-                expr: Some(DdlExpr::TruncateTable(expr)),
-            }),
-            0,
-        )
+        self.do_get(Request::Ddl(DdlRequest {
+            expr: Some(DdlExpr::TruncateTable(expr)),
+        }))
        .await
    }

-    async fn do_get(&self, request: Request, trace_id: u64) -> Result<Output> {
+    async fn do_get(&self, request: Request) -> Result<Output> {
        // FIXME(paomian): should be added some labels for metrics
        let _timer = metrics::METRIC_GRPC_DO_GET.start_timer();
-        let request = self.to_rpc_request(request, trace_id);
+        let request = self.to_rpc_request(request);
        let request = Ticket {
            ticket: request.encode_to_vec().into(),
        };
@@ -315,30 +309,36 @@ impl Database {
                );
                Ok(Output::AffectedRows(rows))
            }
-            FlightMessage::Recordbatch(_) => IllegalFlightMessagesSnafu {
-                reason: "The first flight message cannot be a RecordBatch message",
+            FlightMessage::Recordbatch(_) | FlightMessage::Metrics(_) => {
+                IllegalFlightMessagesSnafu {
+                    reason: "The first flight message cannot be a RecordBatch or Metrics message",
+                }
+                .fail()
            }
-            .fail(),
            FlightMessage::Schema(schema) => {
                let stream = Box::pin(stream!({
                    while let Some(flight_message) = flight_message_stream.next().await {
                        let flight_message = flight_message
                            .map_err(BoxedError::new)
                            .context(ExternalSnafu)?;
-                        let FlightMessage::Recordbatch(record_batch) = flight_message else {
-                            yield IllegalFlightMessagesSnafu {reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"}
+                        match flight_message {
+                            FlightMessage::Recordbatch(record_batch) => yield Ok(record_batch),
+                            FlightMessage::Metrics(_) => {}
+                            FlightMessage::AffectedRows(_) | FlightMessage::Schema(_) => {
+                                yield IllegalFlightMessagesSnafu {reason: format!("A Schema message must be succeeded exclusively by a set of RecordBatch messages, flight_message: {:?}", flight_message)}
                                        .fail()
                                        .map_err(BoxedError::new)
                                        .context(ExternalSnafu);
-                            break;
-                        };
-                        yield Ok(record_batch);
+                                break;
+                            }
+                        }
                    }
                }));
-                let record_batch_stream = RecordBatchStreamAdaptor {
+                let record_batch_stream = RecordBatchStreamWrapper {
                    schema,
                    stream,
                    output_ordering: None,
+                    metrics: Default::default(),
                };
                Ok(Output::Stream(Box::pin(record_batch_stream)))
            }
--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -16,7 +16,7 @@ use std::any::Any;

 use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
-use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG};
+use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
 use common_macro::stack_trace_debug;
 use snafu::{Location, Snafu};
 use tonic::{Code, Status};
@@ -115,7 +115,7 @@ impl From<Status> for Error {
                .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
        }

-        let code = get_metadata_value(&e, GREPTIME_ERROR_CODE)
+        let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
            .and_then(|s| {
                if let Ok(code) = s.parse::<u32>() {
                    StatusCode::from_u32(code)
@@ -125,9 +125,21 @@ impl From<Status> for Error {
            })
            .unwrap_or(StatusCode::Unknown);

-        let msg =
-            get_metadata_value(&e, GREPTIME_ERROR_MSG).unwrap_or_else(|| e.message().to_string());
+        let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
+            .unwrap_or_else(|| e.message().to_string());

        Self::Server { code, msg }
    }
 }
+
+impl Error {
+    pub fn should_retry(&self) -> bool {
+        !matches!(
+            self,
+            Self::RegionServer {
+                code: Code::InvalidArgument,
+                ..
+            }
+        )
+    }
+}
--- a/src/client/src/metrics.rs
+++ b/src/client/src/metrics.rs
@@ -17,27 +17,30 @@ use prometheus::*;

 lazy_static! {
    pub static ref METRIC_GRPC_CREATE_TABLE: Histogram =
-        register_histogram!("grpc_create_table", "grpc create table").unwrap();
-    pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram =
-        register_histogram!("grpc_promql_range_query", "grpc promql range query").unwrap();
+        register_histogram!("greptime_grpc_create_table", "grpc create table").unwrap();
+    pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram = register_histogram!(
+        "greptime_grpc_promql_range_query",
+        "grpc promql range query"
+    )
+    .unwrap();
    pub static ref METRIC_GRPC_INSERT: Histogram =
-        register_histogram!("grpc_insert", "grpc insert").unwrap();
+        register_histogram!("greptime_grpc_insert", "grpc insert").unwrap();
    pub static ref METRIC_GRPC_DELETE: Histogram =
-        register_histogram!("grpc_delete", "grpc delete").unwrap();
+        register_histogram!("greptime_grpc_delete", "grpc delete").unwrap();
    pub static ref METRIC_GRPC_SQL: Histogram =
-        register_histogram!("grpc_sql", "grpc sql").unwrap();
+        register_histogram!("greptime_grpc_sql", "grpc sql").unwrap();
    pub static ref METRIC_GRPC_LOGICAL_PLAN: Histogram =
-        register_histogram!("grpc_logical_plan", "grpc logical plan").unwrap();
+        register_histogram!("greptime_grpc_logical_plan", "grpc logical plan").unwrap();
    pub static ref METRIC_GRPC_ALTER: Histogram =
-        register_histogram!("grpc_alter", "grpc alter").unwrap();
+        register_histogram!("greptime_grpc_alter", "grpc alter").unwrap();
    pub static ref METRIC_GRPC_DROP_TABLE: Histogram =
-        register_histogram!("grpc_drop_table", "grpc drop table").unwrap();
+        register_histogram!("greptime_grpc_drop_table", "grpc drop table").unwrap();
    pub static ref METRIC_GRPC_TRUNCATE_TABLE: Histogram =
-        register_histogram!("grpc_truncate_table", "grpc truncate table").unwrap();
+        register_histogram!("greptime_grpc_truncate_table", "grpc truncate table").unwrap();
    pub static ref METRIC_GRPC_DO_GET: Histogram =
-        register_histogram!("grpc_do_get", "grpc do get").unwrap();
+        register_histogram!("greptime_grpc_do_get", "grpc do get").unwrap();
    pub static ref METRIC_REGION_REQUEST_GRPC: HistogramVec = register_histogram_vec!(
-        "grpc_region_request",
+        "greptime_grpc_region_request",
        "grpc region request",
        &["request_type"]
    )
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -12,8 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::sync::Arc;
+
 use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
 use api::v1::ResponseHeader;
+use arc_swap::ArcSwapOption;
 use arrow_flight::Ticket;
 use async_stream::stream;
 use async_trait::async_trait;
@@ -23,13 +26,13 @@ use common_grpc::flight::{FlightDecoder, FlightMessage};
 use common_meta::datanode_manager::{AffectedRows, Datanode};
 use common_meta::error::{self as meta_error, Result as MetaResult};
 use common_recordbatch::error::ExternalSnafu;
-use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
+use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
 use common_telemetry::error;
+use common_telemetry::tracing_context::TracingContext;
 use prost::Message;
 use snafu::{location, Location, OptionExt, ResultExt};
 use tokio_stream::StreamExt;

-use crate::error::Error::RegionServer;
 use crate::error::{
    self, ConvertFlightDataSnafu, IllegalDatabaseResponseSnafu, IllegalFlightMessagesSnafu,
    MissingFieldSnafu, Result, ServerSnafu,
@@ -45,7 +48,7 @@ pub struct RegionRequester {
 impl Datanode for RegionRequester {
    async fn handle(&self, request: RegionRequest) -> MetaResult<AffectedRows> {
        self.handle_inner(request).await.map_err(|err| {
-            if matches!(err, RegionServer { .. }) {
+            if err.should_retry() {
                meta_error::Error::RetryLater {
                    source: BoxedError::new(err),
                }
@@ -120,27 +123,43 @@ impl RegionRequester {
            .fail();
        };

+        let metrics_str = Arc::new(ArcSwapOption::from(None));
+        let ref_str = metrics_str.clone();
+
+        let tracing_context = TracingContext::from_current_span();
+
        let stream = Box::pin(stream!({
+            let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
+                "poll_flight_data_stream"
+            ));
            while let Some(flight_message) = flight_message_stream.next().await {
                let flight_message = flight_message
                    .map_err(BoxedError::new)
                    .context(ExternalSnafu)?;
-                let FlightMessage::Recordbatch(record_batch) = flight_message else {
-                    yield IllegalFlightMessagesSnafu {
+
+                match flight_message {
+                    FlightMessage::Recordbatch(record_batch) => yield Ok(record_batch),
+                    FlightMessage::Metrics(s) => {
+                        ref_str.swap(Some(Arc::new(s)));
+                        break;
+                    }
+                    _ => {
+                        yield IllegalFlightMessagesSnafu {
                            reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"
                        }
                        .fail()
                        .map_err(BoxedError::new)
                        .context(ExternalSnafu);
-                    break;
-                };
-                yield Ok(record_batch);
+                        break;
+                    }
+                }
            }
        }));
-        let record_batch_stream = RecordBatchStreamAdaptor {
+        let record_batch_stream = RecordBatchStreamWrapper {
            schema,
            stream,
            output_ordering: None,
+            metrics: metrics_str,
        };
        Ok(Box::pin(record_batch_stream))
    }
@@ -231,7 +250,7 @@ mod test {
        let result = check_response_header(Some(ResponseHeader {
            status: Some(PbStatus {
                status_code: StatusCode::Success as u32,
-                err_msg: "".to_string(),
+                err_msg: String::default(),
            }),
        }));
        assert!(result.is_ok());
@@ -239,7 +258,7 @@ mod test {
        let result = check_response_header(Some(ResponseHeader {
            status: Some(PbStatus {
                status_code: u32::MAX,
-                err_msg: "".to_string(),
+                err_msg: String::default(),
            }),
        }));
        assert!(matches!(
--- a/src/client/src/stream_insert.rs
+++ b/src/client/src/stream_insert.rs
@@ -39,7 +39,7 @@ use crate::from_grpc_response;
 /// ```
 ///
 /// If you want to see a concrete usage example, please see
-/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
+/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/main/src/client/examples/stream_ingest.rs).
 pub struct StreamInserter {
    sender: mpsc::Sender<GreptimeRequest>,

--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -18,7 +18,7 @@ async-trait.workspace = true
 auth.workspace = true
 catalog.workspace = true
 chrono.workspace = true
-clap = { version = "3.1", features = ["derive"] }
+clap.workspace = true
 client.workspace = true
 common-base.workspace = true
 common-catalog.workspace = true
@@ -29,9 +29,12 @@ common-meta.workspace = true
 common-procedure.workspace = true
 common-query.workspace = true
 common-recordbatch.workspace = true
+common-runtime.workspace = true
 common-telemetry = { workspace = true, features = [
    "deadlock_detection",
 ] }
+common-time.workspace = true
+common-wal.workspace = true
 config = "0.13"
 datanode.workspace = true
 datatypes.workspace = true
@@ -40,6 +43,7 @@ etcd-client.workspace = true
 file-engine.workspace = true
 frontend.workspace = true
 futures.workspace = true
+human-panic = "1.2.2"
 lazy_static.workspace = true
 meta-client.workspace = true
 meta-srv.workspace = true
@@ -58,6 +62,7 @@ serde_json.workspace = true
 servers.workspace = true
 session.workspace = true
 snafu.workspace = true
+store-api.workspace = true
 substrait.workspace = true
 table.workspace = true
 tokio.workspace = true
--- a/src/cmd/build.rs
+++ b/src/cmd/build.rs
@@ -13,5 +13,5 @@
 // limitations under the License.

 fn main() {
-    common_version::setup_git_versions();
+    common_version::setup_build_info();
 }
--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -16,79 +16,12 @@

 use std::fmt;

-use clap::Parser;
+use clap::{FromArgMatches, Parser, Subcommand};
 use cmd::error::Result;
-use cmd::options::{Options, TopLevelOptions};
-use cmd::{cli, datanode, frontend, metasrv, standalone};
-use common_telemetry::logging::{error, info, TracingOptions};
-
-lazy_static::lazy_static! {
-    static ref APP_VERSION: prometheus::IntGaugeVec =
-        prometheus::register_int_gauge_vec!("app_version", "app version", &["short_version", "version"]).unwrap();
-}
-
-#[derive(Parser)]
-#[clap(name = "greptimedb", version = print_version())]
-struct Command {
-    #[clap(long)]
-    log_dir: Option<String>,
-    #[clap(long)]
-    log_level: Option<String>,
-    #[clap(subcommand)]
-    subcmd: SubCommand,
-
-    #[cfg(feature = "tokio-console")]
-    #[clap(long)]
-    tokio_console_addr: Option<String>,
-}
-
-pub enum Application {
-    Datanode(datanode::Instance),
-    Frontend(frontend::Instance),
-    Metasrv(metasrv::Instance),
-    Standalone(standalone::Instance),
-    Cli(cli::Instance),
-}
-
-impl Application {
-    async fn start(&mut self) -> Result<()> {
-        match self {
-            Application::Datanode(instance) => instance.start().await,
-            Application::Frontend(instance) => instance.start().await,
-            Application::Metasrv(instance) => instance.start().await,
-            Application::Standalone(instance) => instance.start().await,
-            Application::Cli(instance) => instance.start().await,
-        }
-    }
-
-    async fn stop(&self) -> Result<()> {
-        match self {
-            Application::Datanode(instance) => instance.stop().await,
-            Application::Frontend(instance) => instance.stop().await,
-            Application::Metasrv(instance) => instance.stop().await,
-            Application::Standalone(instance) => instance.stop().await,
-            Application::Cli(instance) => instance.stop().await,
-        }
-    }
-}
-
-impl Command {
-    async fn build(self, opts: Options) -> Result<Application> {
-        self.subcmd.build(opts).await
-    }
-
-    fn load_options(&self) -> Result<Options> {
-        let top_level_opts = self.top_level_options();
-        self.subcmd.load_options(top_level_opts)
-    }
-
-    fn top_level_options(&self) -> TopLevelOptions {
-        TopLevelOptions {
-            log_dir: self.log_dir.clone(),
-            log_level: self.log_level.clone(),
-        }
-    }
-}
+use cmd::options::{CliOptions, Options};
+use cmd::{
+    cli, datanode, frontend, greptimedb_cli, log_versions, metasrv, standalone, start_app, App,
+};

 #[derive(Parser)]
 enum SubCommand {
@@ -105,40 +38,41 @@ enum SubCommand {
 }

 impl SubCommand {
-    async fn build(self, opts: Options) -> Result<Application> {
-        match (self, opts) {
+    async fn build(self, opts: Options) -> Result<Box<dyn App>> {
+        let app: Box<dyn App> = match (self, opts) {
            (SubCommand::Datanode(cmd), Options::Datanode(dn_opts)) => {
                let app = cmd.build(*dn_opts).await?;
-                Ok(Application::Datanode(app))
+                Box::new(app) as _
            }
            (SubCommand::Frontend(cmd), Options::Frontend(fe_opts)) => {
                let app = cmd.build(*fe_opts).await?;
-                Ok(Application::Frontend(app))
+                Box::new(app) as _
            }
            (SubCommand::Metasrv(cmd), Options::Metasrv(meta_opts)) => {
                let app = cmd.build(*meta_opts).await?;
-                Ok(Application::Metasrv(app))
+                Box::new(app) as _
            }
            (SubCommand::Standalone(cmd), Options::Standalone(opts)) => {
                let app = cmd.build(*opts).await?;
-                Ok(Application::Standalone(app))
+                Box::new(app) as _
            }
            (SubCommand::Cli(cmd), Options::Cli(_)) => {
                let app = cmd.build().await?;
-                Ok(Application::Cli(app))
+                Box::new(app) as _
            }

            _ => unreachable!(),
-        }
+        };
+        Ok(app)
    }

-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        match self {
-            SubCommand::Datanode(cmd) => cmd.load_options(top_level_opts),
-            SubCommand::Frontend(cmd) => cmd.load_options(top_level_opts),
-            SubCommand::Metasrv(cmd) => cmd.load_options(top_level_opts),
-            SubCommand::Standalone(cmd) => cmd.load_options(top_level_opts),
-            SubCommand::Cli(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Datanode(cmd) => cmd.load_options(cli_options),
+            SubCommand::Frontend(cmd) => cmd.load_options(cli_options),
+            SubCommand::Metasrv(cmd) => cmd.load_options(cli_options),
+            SubCommand::Standalone(cmd) => cmd.load_options(cli_options),
+            SubCommand::Cli(cmd) => cmd.load_options(cli_options),
        }
    }
 }
@@ -155,89 +89,49 @@ impl fmt::Display for SubCommand {
    }
 }

-fn print_version() -> &'static str {
-    concat!(
-        "\nbranch: ",
-        env!("GIT_BRANCH"),
-        "\ncommit: ",
-        env!("GIT_COMMIT"),
-        "\ndirty: ",
-        env!("GIT_DIRTY"),
-        "\nversion: ",
-        env!("CARGO_PKG_VERSION")
-    )
-}
-
-fn short_version() -> &'static str {
-    env!("CARGO_PKG_VERSION")
-}
-
-// {app_name}-{branch_name}-{commit_short}
-// The branch name (tag) of a release build should already contain the short
-// version so the full version doesn't concat the short version explicitly.
-fn full_version() -> &'static str {
-    concat!(
-        "greptimedb-",
-        env!("GIT_BRANCH"),
-        "-",
-        env!("GIT_COMMIT_SHORT")
-    )
-}
-
-fn log_env_flags() {
-    info!("command line arguments");
-    for argument in std::env::args() {
-        info!("argument: {}", argument);
-    }
-}
-
 #[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

 #[tokio::main]
 async fn main() -> Result<()> {
-    let cmd = Command::parse();
-    let app_name = &cmd.subcmd.to_string();
-
-    let opts = cmd.load_options()?;
-    let logging_opts = opts.logging_options();
-    let tracing_opts = TracingOptions {
-        #[cfg(feature = "tokio-console")]
-        tokio_console_addr: cmd.tokio_console_addr.clone(),
+    let metadata = human_panic::Metadata {
+        version: env!("CARGO_PKG_VERSION").into(),
+        name: "GreptimeDB".into(),
+        authors: Default::default(),
+        homepage: "https://github.com/GreptimeTeam/greptimedb/discussions".into(),
    };
+    human_panic::setup_panic!(metadata);

    common_telemetry::set_panic_hook();
-    let _guard = common_telemetry::init_global_logging(app_name, logging_opts, tracing_opts);

-    // Report app version as gauge.
-    APP_VERSION
-        .with_label_values(&[short_version(), full_version()])
-        .inc();
+    let cli = greptimedb_cli();

-    // Log version and argument flags.
-    info!(
-        "short_version: {}, full_version: {}",
-        short_version(),
-        full_version()
+    let cli = SubCommand::augment_subcommands(cli);
+
+    let args = cli.get_matches();
+
+    let subcmd = match SubCommand::from_arg_matches(&args) {
+        Ok(subcmd) => subcmd,
+        Err(e) => e.exit(),
+    };
+
+    let app_name = subcmd.to_string();
+
+    let cli_options = CliOptions::new(&args);
+
+    let opts = subcmd.load_options(&cli_options)?;
+
+    let _guard = common_telemetry::init_global_logging(
+        &app_name,
+        opts.logging_options(),
+        cli_options.tracing_options(),
+        opts.node_id(),
    );
-    log_env_flags();

-    let mut app = cmd.build(opts).await?;
+    log_versions();

-    tokio::select! {
-        result = app.start() => {
-            if let Err(err) = result {
-                error!(err; "Fatal error occurs!");
-            }
-        }
-        _ = tokio::signal::ctrl_c() => {
-            if let Err(err) = app.stop().await {
-                error!(err; "Fatal error occurs!");
-            }
-            info!("Goodbye!");
-        }
-    }
+    let app = subcmd.build(opts).await?;

-    Ok(())
+    start_app(app).await
 }
--- a/src/cmd/src/cli.rs
+++ b/src/cmd/src/cli.rs
@@ -13,9 +13,15 @@
 // limitations under the License.

 mod bench;
+
+// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
+#[allow(unused)]
 mod cmd;
 mod export;
 mod helper;
+
+// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
+#[allow(unused)]
 mod repl;
 // TODO(weny): Removes it
 #[allow(deprecated)]
@@ -30,27 +36,35 @@ use upgrade::UpgradeCommand;

 use self::export::ExportCommand;
 use crate::error::Result;
-use crate::options::{Options, TopLevelOptions};
+use crate::options::{CliOptions, Options};
+use crate::App;

 #[async_trait]
-pub trait Tool {
+pub trait Tool: Send + Sync {
    async fn do_work(&self) -> Result<()>;
 }

-pub enum Instance {
-    Repl(Repl),
-    Tool(Box<dyn Tool>),
+pub struct Instance {
+    tool: Box<dyn Tool>,
 }

 impl Instance {
-    pub async fn start(&mut self) -> Result<()> {
-        match self {
-            Instance::Repl(repl) => repl.run().await,
-            Instance::Tool(tool) => tool.do_work().await,
-        }
+    fn new(tool: Box<dyn Tool>) -> Self {
+        Self { tool }
+    }
+}
+
+#[async_trait]
+impl App for Instance {
+    fn name(&self) -> &str {
+        "greptime-cli"
    }

-    pub async fn stop(&self) -> Result<()> {
+    async fn start(&mut self) -> Result<()> {
+        self.tool.do_work().await
+    }
+
+    async fn stop(&self) -> Result<()> {
        Ok(())
    }
 }
@@ -66,14 +80,15 @@ impl Command {
        self.cmd.build().await
    }

-    pub fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    pub fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        let mut logging_opts = LoggingOptions::default();
-        if let Some(dir) = top_level_opts.log_dir {
-            logging_opts.dir = dir;
-        }
-        if top_level_opts.log_level.is_some() {
-            logging_opts.level = top_level_opts.log_level;
+
+        if let Some(dir) = &cli_options.log_dir {
+            logging_opts.dir = dir.clone();
        }
+
+        logging_opts.level = cli_options.log_level.clone();
+
        Ok(Options::Cli(Box::new(logging_opts)))
    }
 }
@@ -110,7 +125,6 @@ pub(crate) struct AttachCommand {
 impl AttachCommand {
    #[allow(dead_code)]
    async fn build(self) -> Result<Instance> {
-        let repl = Repl::try_new(&self).await?;
-        Ok(Instance::Repl(repl))
+        unimplemented!("Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373")
    }
 }
--- a/src/cmd/src/cli/bench.rs
+++ b/src/cmd/src/cli/bench.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 use std::future::Future;
 use std::sync::Arc;
 use std::time::Duration;
@@ -28,6 +28,7 @@ use common_telemetry::info;
 use datatypes::data_type::ConcreteDataType;
 use datatypes::schema::{ColumnSchema, RawSchema};
 use rand::Rng;
+use store_api::storage::RegionNumber;
 use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType};

 use self::metadata::TableMetadataBencher;
@@ -69,7 +70,7 @@ impl BenchTableMetadataCommand {
            table_metadata_manager,
            count: self.count,
        };
-        Ok(Instance::Tool(Box::new(tool)))
+        Ok(Instance::new(Box::new(tool)))
    }
 }

@@ -137,12 +138,12 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
    }
 }

-fn create_region_routes() -> Vec<RegionRoute> {
-    let mut regions = Vec::with_capacity(100);
+fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
+    let mut region_routes = Vec::with_capacity(100);
    let mut rng = rand::thread_rng();

-    for region_id in 0..64u64 {
-        regions.push(RegionRoute {
+    for region_id in regions.into_iter().map(u64::from) {
+        region_routes.push(RegionRoute {
            region: Region {
                id: region_id.into(),
                name: String::new(),
@@ -155,8 +156,15 @@ fn create_region_routes() -> Vec<RegionRoute> {
            }),
            follower_peers: vec![],
            leader_status: None,
+            leader_down_since: None,
        });
    }

-    regions
+    region_routes
+}
+
+fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, String> {
+    // TODO(niebayes): construct region wal options for benchmark.
+    let _ = regions;
+    HashMap::default()
 }
--- a/src/cmd/src/cli/bench/metadata.rs
+++ b/src/cmd/src/cli/bench/metadata.rs
@@ -14,10 +14,13 @@

 use std::time::Instant;

+use common_meta::key::table_route::TableRouteValue;
 use common_meta::key::TableMetadataManagerRef;
 use common_meta::table_name::TableName;

-use super::{bench_self_recorded, create_region_routes, create_table_info};
+use crate::cli::bench::{
+    bench_self_recorded, create_region_routes, create_region_wal_options, create_table_info,
+};

 pub struct TableMetadataBencher {
    table_metadata_manager: TableMetadataManagerRef,
@@ -43,12 +46,19 @@ impl TableMetadataBencher {
                let table_name = format!("bench_table_name_{}", i);
                let table_name = TableName::new("bench_catalog", "bench_schema", table_name);
                let table_info = create_table_info(i, table_name);
-                let region_routes = create_region_routes();
+
+                let regions: Vec<_> = (0..64).collect();
+                let region_routes = create_region_routes(regions.clone());
+                let region_wal_options = create_region_wal_options(regions);

                let start = Instant::now();

                self.table_metadata_manager
-                    .create_table_metadata(table_info, region_routes)
+                    .create_table_metadata(
+                        table_info,
+                        TableRouteValue::physical(region_routes),
+                        region_wal_options,
+                    )
                    .await
                    .unwrap();

--- a/src/cmd/src/cli/export.rs
+++ b/src/cmd/src/cli/export.rs
@@ -58,8 +58,8 @@ pub struct ExportCommand {
    #[clap(long)]
    output_dir: String,

-    /// The name of the catalog to export. Default to "greptime-*"".
-    #[clap(long, default_value = "")]
+    /// The name of the catalog to export.
+    #[clap(long, default_value = "greptime-*")]
    database: String,

    /// Parallelism of the export.
@@ -105,7 +105,7 @@ impl ExportCommand {
            }));
        }

-        Ok(Instance::Tool(Box::new(Export {
+        Ok(Instance::new(Box::new(Export {
            client: database_client,
            catalog,
            schema,
--- a/src/cmd/src/cli/repl.rs
+++ b/src/cmd/src/cli/repl.rs
@@ -16,8 +16,9 @@ use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Instant;

-use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager};
-use client::client_manager::DatanodeClients;
+use catalog::kvbackend::{
+    CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager,
+};
 use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use common_base::Plugins;
 use common_error::ext::ErrorExt;
@@ -158,25 +159,26 @@ impl Repl {
        let start = Instant::now();

        let output = if let Some(query_engine) = &self.query_engine {
-            let stmt = QueryLanguageParser::parse_sql(&sql)
-                .with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
-
            let query_ctx = QueryContext::with(self.database.catalog(), self.database.schema());

+            let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
+                .with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
+
            let plan = query_engine
                .planner()
-                .plan(stmt, query_ctx)
+                .plan(stmt, query_ctx.clone())
                .await
                .context(PlanStatementSnafu)?;

-            let LogicalPlan::DfPlan(plan) =
-                query_engine.optimize(&plan).context(PlanStatementSnafu)?;
+            let LogicalPlan::DfPlan(plan) = query_engine
+                .optimize(&query_engine.engine_context(query_ctx), &plan)
+                .context(PlanStatementSnafu)?;

            let plan = DFLogicalSubstraitConvertor {}
                .encode(&plan)
                .context(SubstraitEncodeLogicalPlanSnafu)?;

-            self.database.logical_plan(plan.to_vec(), 0).await
+            self.database.logical_plan(plan.to_vec()).await
        } else {
            self.database.sql(&sql).await
        }
@@ -248,15 +250,11 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
        .context(StartMetaClientSnafu)?;
    let meta_client = Arc::new(meta_client);

-    let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
+    let cached_meta_backend =
+        Arc::new(CachedMetaKvBackendBuilder::new(meta_client.clone()).build());

-    let datanode_clients = Arc::new(DatanodeClients::default());
-
-    let catalog_list = KvBackendCatalogManager::new(
-        cached_meta_backend.clone(),
-        cached_meta_backend.clone(),
-        datanode_clients,
-    );
+    let catalog_list =
+        KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend);
    let plugins: Plugins = Default::default();
    let state = Arc::new(QueryEngineState::new(
        catalog_list,
--- a/src/cmd/src/cli/upgrade.rs
+++ b/src/cmd/src/cli/upgrade.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
 use std::sync::Arc;

 use async_trait::async_trait;
@@ -26,7 +27,7 @@ use common_meta::key::table_info::{TableInfoKey, TableInfoValue};
 use common_meta::key::table_name::{TableNameKey, TableNameValue};
 use common_meta::key::table_region::{TableRegionKey, TableRegionValue};
 use common_meta::key::table_route::{TableRouteKey, TableRouteValue as NextTableRouteValue};
-use common_meta::key::{RegionDistribution, TableMetaKey};
+use common_meta::key::{RegionDistribution, TableMetaKey, TableMetaValue};
 use common_meta::kv_backend::etcd::EtcdStore;
 use common_meta::kv_backend::KvBackendRef;
 use common_meta::range_stream::PaginationStream;
@@ -76,7 +77,7 @@ impl UpgradeCommand {
            skip_schema_keys: self.skip_schema_keys,
            skip_table_route_keys: self.skip_table_route_keys,
        };
-        Ok(Instance::Tool(Box::new(tool)))
+        Ok(Instance::new(Box::new(tool)))
    }
 }

@@ -152,7 +153,7 @@ impl MigrateTableMetadata {
        )
        .unwrap();

-        let new_table_value = NextTableRouteValue::new(table_route.region_routes);
+        let new_table_value = NextTableRouteValue::physical(table_route.region_routes);

        let table_id = table_route.table.id as u32;
        let new_key = TableRouteKey::new(table_id);
@@ -395,6 +396,9 @@ impl MigrateTableMetadata {
        let region_distribution: RegionDistribution =
            value.regions_id_map.clone().into_iter().collect();

+        // TODO(niebayes): properly fetch or construct wal options.
+        let region_wal_options = HashMap::default();
+
        let datanode_table_kvs = region_distribution
            .into_iter()
            .map(|(datanode_id, regions)| {
@@ -409,6 +413,7 @@ impl MigrateTableMetadata {
                            engine: engine.to_string(),
                            region_storage_path: region_storage_path.clone(),
                            region_options: (&value.table_info.meta.options).into(),
+                            region_wal_options: region_wal_options.clone(),
                        },
                    ),
                )
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -12,25 +12,46 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::sync::Arc;
 use std::time::Duration;

+use async_trait::async_trait;
+use catalog::kvbackend::MetaKvBackend;
 use clap::Parser;
-use common_telemetry::logging;
+use common_telemetry::{info, logging};
+use common_wal::config::DatanodeWalConfig;
 use datanode::config::DatanodeOptions;
 use datanode::datanode::{Datanode, DatanodeBuilder};
+use datanode::service::DatanodeServiceBuilder;
 use meta_client::MetaClientOptions;
 use servers::Mode;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};

 use crate::error::{MissingConfigSnafu, Result, ShutdownDatanodeSnafu, StartDatanodeSnafu};
-use crate::options::{Options, TopLevelOptions};
+use crate::options::{CliOptions, Options};
+use crate::App;

 pub struct Instance {
    datanode: Datanode,
 }

 impl Instance {
-    pub async fn start(&mut self) -> Result<()> {
+    pub fn new(datanode: Datanode) -> Self {
+        Self { datanode }
+    }
+
+    pub fn datanode_mut(&mut self) -> &mut Datanode {
+        &mut self.datanode
+    }
+}
+
+#[async_trait]
+impl App for Instance {
+    fn name(&self) -> &str {
+        "greptime-datanode"
+    }
+
+    async fn start(&mut self) -> Result<()> {
        plugins::start_datanode_plugins(self.datanode.plugins())
            .await
            .context(StartDatanodeSnafu)?;
@@ -38,7 +59,7 @@ impl Instance {
        self.datanode.start().await.context(StartDatanodeSnafu)
    }

-    pub async fn stop(&self) -> Result<()> {
+    async fn stop(&self) -> Result<()> {
        self.datanode
            .shutdown()
            .await
@@ -57,8 +78,8 @@ impl Command {
        self.subcmd.build(opts).await
    }

-    pub fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
-        self.subcmd.load_options(top_level_opts)
+    pub fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
+        self.subcmd.load_options(cli_options)
    }
 }

@@ -74,9 +95,9 @@ impl SubCommand {
        }
    }

-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        match self {
-            SubCommand::Start(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Start(cmd) => cmd.load_options(cli_options),
        }
    }
 }
@@ -89,7 +110,7 @@ struct StartCommand {
    rpc_addr: Option<String>,
    #[clap(long)]
    rpc_hostname: Option<String>,
-    #[clap(long, multiple = true, value_delimiter = ',')]
+    #[clap(long, value_delimiter = ',', num_args = 1..)]
    metasrv_addr: Option<Vec<String>>,
    #[clap(short, long)]
    config_file: Option<String>,
@@ -106,19 +127,19 @@ struct StartCommand {
 }

 impl StartCommand {
-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        let mut opts: DatanodeOptions = Options::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
            DatanodeOptions::env_list_keys(),
        )?;

-        if let Some(dir) = top_level_opts.log_dir {
-            opts.logging.dir = dir;
+        if let Some(dir) = &cli_options.log_dir {
+            opts.logging.dir = dir.clone();
        }

-        if top_level_opts.log_level.is_some() {
-            opts.logging.level = top_level_opts.log_level;
+        if cli_options.log_level.is_some() {
+            opts.logging.level = cli_options.log_level.clone();
        }

        if let Some(addr) = &self.rpc_addr {
@@ -151,8 +172,18 @@ impl StartCommand {
            opts.storage.data_home = data_home.clone();
        }

-        if let Some(wal_dir) = &self.wal_dir {
-            opts.wal.dir = Some(wal_dir.clone());
+        // `wal_dir` only affects raft-engine config.
+        if let Some(wal_dir) = &self.wal_dir
+            && let DatanodeWalConfig::RaftEngine(raft_engine_config) = &mut opts.wal
+        {
+            if raft_engine_config
+                .dir
+                .as_ref()
+                .is_some_and(|original_dir| original_dir != wal_dir)
+            {
+                info!("The wal dir of raft-engine is altered to {wal_dir}");
+            }
+            raft_engine_config.dir.replace(wal_dir.clone());
        }

        if let Some(http_addr) = &self.http_addr {
@@ -177,12 +208,37 @@ impl StartCommand {
        logging::info!("Datanode start command: {:#?}", self);
        logging::info!("Datanode options: {:#?}", opts);

-        let datanode = DatanodeBuilder::new(opts, None, plugins)
+        let node_id = opts
+            .node_id
+            .context(MissingConfigSnafu { msg: "'node_id'" })?;
+
+        let meta_config = opts.meta_client.as_ref().context(MissingConfigSnafu {
+            msg: "'meta_client_options'",
+        })?;
+
+        let meta_client = datanode::heartbeat::new_metasrv_client(node_id, meta_config)
+            .await
+            .context(StartDatanodeSnafu)?;
+
+        let meta_backend = Arc::new(MetaKvBackend {
+            client: Arc::new(meta_client.clone()),
+        });
+
+        let mut datanode = DatanodeBuilder::new(opts.clone(), plugins)
+            .with_meta_client(meta_client)
+            .with_kv_backend(meta_backend)
            .build()
            .await
            .context(StartDatanodeSnafu)?;

-        Ok(Instance { datanode })
+        let services = DatanodeServiceBuilder::new(&opts)
+            .with_default_grpc_server(&datanode.region_server())
+            .enable_http_service()
+            .build()
+            .context(StartDatanodeSnafu)?;
+        datanode.setup_services(services);
+
+        Ok(Instance::new(datanode))
    }
 }

@@ -192,12 +248,12 @@ mod tests {
    use std::time::Duration;

    use common_test_util::temp_dir::create_named_temp_file;
-    use datanode::config::{CompactionConfig, FileConfig, ObjectStoreConfig, RegionManifestConfig};
+    use datanode::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
    use servers::heartbeat_options::HeartbeatOptions;
    use servers::Mode;

    use super::*;
-    use crate::options::ENV_VAR_SEP;
+    use crate::options::{CliOptions, ENV_VAR_SEP};

    #[test]
    fn test_read_from_config_file() {
@@ -221,6 +277,7 @@ mod tests {
            tcp_nodelay = true

            [wal]
+            provider = "raft_engine"
            dir = "/other/wal"
            file_size = "1GB"
            purge_threshold = "50GB"
@@ -229,18 +286,17 @@ mod tests {
            sync_write = false

            [storage]
-            type = "File"
            data_home = "/tmp/greptimedb/"
+            type = "File"

-            [storage.compaction]
-            max_inflight_tasks = 3
-            max_files_in_level0 = 7
-            max_purge_tasks = 32
+            [[storage.providers]]
+            type = "Gcs"
+            bucket = "foo"
+            endpoint = "bar"

-            [storage.manifest]
-            checkpoint_margin = 9
-            gc_duration = '7s'
-            compress = true
+            [[storage.providers]]
+            type = "S3"
+            bucket = "foo"

            [logging]
            level = "debug"
@@ -253,19 +309,24 @@ mod tests {
            ..Default::default()
        };

-        let Options::Datanode(options) = cmd.load_options(TopLevelOptions::default()).unwrap()
-        else {
+        let Options::Datanode(options) = cmd.load_options(&CliOptions::default()).unwrap() else {
            unreachable!()
        };

        assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
        assert_eq!(Some(42), options.node_id);
-        assert_eq!("/other/wal", options.wal.dir.unwrap());

-        assert_eq!(Duration::from_secs(600), options.wal.purge_interval);
-        assert_eq!(1024 * 1024 * 1024, options.wal.file_size.0);
-        assert_eq!(1024 * 1024 * 1024 * 50, options.wal.purge_threshold.0);
-        assert!(!options.wal.sync_write);
+        let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
+            unreachable!()
+        };
+        assert_eq!("/other/wal", raft_engine_config.dir.unwrap());
+        assert_eq!(Duration::from_secs(600), raft_engine_config.purge_interval);
+        assert_eq!(1024 * 1024 * 1024, raft_engine_config.file_size.0);
+        assert_eq!(
+            1024 * 1024 * 1024 * 50,
+            raft_engine_config.purge_threshold.0
+        );
+        assert!(!raft_engine_config.sync_write);

        let HeartbeatOptions {
            interval: heart_beat_interval,
@@ -293,23 +354,15 @@ mod tests {
            &options.storage.store,
            ObjectStoreConfig::File(FileConfig { .. })
        ));
-
-        assert_eq!(
-            CompactionConfig {
-                max_inflight_tasks: 3,
-                max_files_in_level0: 7,
-                max_purge_tasks: 32,
-            },
-            options.storage.compaction,
-        );
-        assert_eq!(
-            RegionManifestConfig {
-                checkpoint_margin: Some(9),
-                gc_duration: Some(Duration::from_secs(7)),
-                compress: true
-            },
-            options.storage.manifest,
-        );
+        assert_eq!(options.storage.providers.len(), 2);
+        assert!(matches!(
+            options.storage.providers[0],
+            ObjectStoreConfig::Gcs(GcsConfig { .. })
+        ));
+        assert!(matches!(
+            options.storage.providers[1],
+            ObjectStoreConfig::S3(S3Config { .. })
+        ));

        assert_eq!("debug", options.logging.level.unwrap());
        assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
@@ -318,7 +371,7 @@ mod tests {
    #[test]
    fn test_try_from_cmd() {
        if let Options::Datanode(opt) = StartCommand::default()
-            .load_options(TopLevelOptions::default())
+            .load_options(&CliOptions::default())
            .unwrap()
        {
            assert_eq!(Mode::Standalone, opt.mode)
@@ -329,7 +382,7 @@ mod tests {
            metasrv_addr: Some(vec!["127.0.0.1:3002".to_string()]),
            ..Default::default()
        })
-        .load_options(TopLevelOptions::default())
+        .load_options(&CliOptions::default())
        .unwrap()
        {
            assert_eq!(Mode::Distributed, opt.mode)
@@ -339,7 +392,7 @@ mod tests {
            metasrv_addr: Some(vec!["127.0.0.1:3002".to_string()]),
            ..Default::default()
        })
-        .load_options(TopLevelOptions::default())
+        .load_options(&CliOptions::default())
        .is_err());

        // Providing node_id but leave metasrv_addr absent is ok since metasrv_addr has default value
@@ -347,18 +400,21 @@ mod tests {
            node_id: Some(42),
            ..Default::default()
        })
-        .load_options(TopLevelOptions::default())
+        .load_options(&CliOptions::default())
        .is_ok());
    }

    #[test]
-    fn test_top_level_options() {
+    fn test_load_log_options_from_cli() {
        let cmd = StartCommand::default();

        let options = cmd
-            .load_options(TopLevelOptions {
+            .load_options(&CliOptions {
                log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
                log_level: Some("debug".to_string()),
+
+                #[cfg(feature = "tokio-console")]
+                tokio_console_addr: None,
            })
            .unwrap();

@@ -384,21 +440,16 @@ mod tests {
            tcp_nodelay = true

            [wal]
+            provider = "raft_engine"
            file_size = "1GB"
            purge_threshold = "50GB"
-            purge_interval = "10m"
-            read_batch_size = 128
+            purge_interval = "5m"
            sync_write = false

            [storage]
            type = "File"
            data_home = "/tmp/greptimedb/"

-            [storage.compaction]
-            max_inflight_tasks = 3
-            max_files_in_level0 = 7
-            max_purge_tasks = 32
-
            [logging]
            level = "debug"
            dir = "/tmp/greptimedb/test/logs"
@@ -409,26 +460,24 @@ mod tests {
        temp_env::with_vars(
            [
                (
-                    // storage.manifest.gc_duration = 9s
+                    // wal.purge_interval = 1m
                    [
                        env_prefix.to_string(),
-                        "storage".to_uppercase(),
-                        "manifest".to_uppercase(),
-                        "gc_duration".to_uppercase(),
+                        "wal".to_uppercase(),
+                        "purge_interval".to_uppercase(),
                    ]
                    .join(ENV_VAR_SEP),
-                    Some("9s"),
+                    Some("1m"),
                ),
                (
-                    // storage.compaction.max_purge_tasks = 99
+                    // wal.read_batch_size = 100
                    [
                        env_prefix.to_string(),
-                        "storage".to_uppercase(),
-                        "compaction".to_uppercase(),
-                        "max_purge_tasks".to_uppercase(),
+                        "wal".to_uppercase(),
+                        "read_batch_size".to_uppercase(),
                    ]
                    .join(ENV_VAR_SEP),
-                    Some("99"),
+                    Some("100"),
                ),
                (
                    // meta_client.metasrv_addrs = 127.0.0.1:3001,127.0.0.1:3002,127.0.0.1:3003
@@ -449,17 +498,16 @@ mod tests {
                    ..Default::default()
                };

-                let Options::Datanode(opts) =
-                    command.load_options(TopLevelOptions::default()).unwrap()
+                let Options::Datanode(opts) = command.load_options(&CliOptions::default()).unwrap()
                else {
                    unreachable!()
                };

                // Should be read from env, env > default values.
-                assert_eq!(
-                    opts.storage.manifest.gc_duration,
-                    Some(Duration::from_secs(9))
-                );
+                let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
+                    unreachable!()
+                };
+                assert_eq!(raft_engine_config.read_batch_size, 100);
                assert_eq!(
                    opts.meta_client.unwrap().metasrv_addrs,
                    vec![
@@ -470,19 +518,16 @@ mod tests {
                );

                // Should be read from config file, config file > env > default values.
-                assert_eq!(opts.storage.compaction.max_purge_tasks, 32);
+                assert_eq!(
+                    raft_engine_config.purge_interval,
+                    Duration::from_secs(60 * 5)
+                );

                // Should be read from cli, cli > config file > env > default values.
-                assert_eq!(opts.wal.dir.unwrap(), "/other/wal/dir");
+                assert_eq!(raft_engine_config.dir.unwrap(), "/other/wal/dir");

                // Should be default value.
-                assert_eq!(
-                    opts.storage.manifest.checkpoint_margin,
-                    DatanodeOptions::default()
-                        .storage
-                        .manifest
-                        .checkpoint_margin
-                );
+                assert_eq!(opts.http.addr, DatanodeOptions::default().http.addr);
            },
        );
    }
--- a/src/cmd/src/error.rs
+++ b/src/cmd/src/error.rs
@@ -14,7 +14,7 @@

 use std::any::Any;

-use common_error::ext::ErrorExt;
+use common_error::ext::{BoxedError, ErrorExt};
 use common_error::status_code::StatusCode;
 use common_macro::stack_trace_debug;
 use config::ConfigError;
@@ -37,6 +37,18 @@ pub enum Error {
        source: common_meta::error::Error,
    },

+    #[snafu(display("Failed to init DDL manager"))]
+    InitDdlManager {
+        location: Location,
+        source: common_meta::error::Error,
+    },
+
+    #[snafu(display("Failed to init default timezone"))]
+    InitTimezone {
+        location: Location,
+        source: common_time::error::Error,
+    },
+
    #[snafu(display("Failed to start procedure manager"))]
    StartProcedureManager {
        location: Location,
@@ -49,6 +61,12 @@ pub enum Error {
        source: common_procedure::error::Error,
    },

+    #[snafu(display("Failed to start wal options allocator"))]
+    StartWalOptionsAllocator {
+        location: Location,
+        source: common_meta::error::Error,
+    },
+
    #[snafu(display("Failed to start datanode"))]
    StartDatanode {
        location: Location,
@@ -225,6 +243,18 @@ pub enum Error {
        #[snafu(source)]
        error: std::io::Error,
    },
+
+    #[snafu(display("Other error"))]
+    Other {
+        source: BoxedError,
+        location: Location,
+    },
+
+    #[snafu(display("Failed to build runtime"))]
+    BuildRuntime {
+        location: Location,
+        source: common_runtime::error::Error,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -240,21 +270,26 @@ impl ErrorExt for Error {
            Error::ShutdownMetaServer { source, .. } => source.status_code(),
            Error::BuildMetaServer { source, .. } => source.status_code(),
            Error::UnsupportedSelectorType { source, .. } => source.status_code(),
-            Error::IterStream { source, .. } | Error::InitMetadata { source, .. } => {
-                source.status_code()
-            }
+
+            Error::IterStream { source, .. }
+            | Error::InitMetadata { source, .. }
+            | Error::InitDdlManager { source, .. } => source.status_code(),
+
            Error::ConnectServer { source, .. } => source.status_code(),
            Error::MissingConfig { .. }
            | Error::LoadLayeredConfig { .. }
            | Error::IllegalConfig { .. }
            | Error::InvalidReplCommand { .. }
+            | Error::InitTimezone { .. }
            | Error::ConnectEtcd { .. }
            | Error::NotDataFromOutput { .. }
            | Error::CreateDir { .. }
            | Error::EmptyResult { .. }
            | Error::InvalidDatabaseName { .. } => StatusCode::InvalidArguments,
+
            Error::StartProcedureManager { source, .. }
            | Error::StopProcedureManager { source, .. } => source.status_code(),
+            Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
            Error::ReplCreation { .. } | Error::Readline { .. } => StatusCode::Internal,
            Error::RequestDatabase { source, .. } => source.status_code(),
            Error::CollectRecordBatches { source, .. }
@@ -267,6 +302,10 @@ impl ErrorExt for Error {
            Error::StartCatalogManager { source, .. } => source.status_code(),

            Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected,
+
+            Error::Other { source, .. } => source.status_code(),
+
+            Error::BuildRuntime { source, .. } => source.status_code(),
        }
    }

--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -12,26 +12,53 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::sync::Arc;
 use std::time::Duration;

+use async_trait::async_trait;
+use catalog::kvbackend::CachedMetaKvBackendBuilder;
 use clap::Parser;
+use client::client_manager::DatanodeClients;
+use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
+use common_meta::heartbeat::handler::HandlerGroupExecutor;
 use common_telemetry::logging;
+use common_time::timezone::set_default_timezone;
 use frontend::frontend::FrontendOptions;
+use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
+use frontend::heartbeat::HeartbeatTask;
+use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{FrontendInstance, Instance as FeInstance};
+use frontend::server::Services;
 use meta_client::MetaClientOptions;
 use servers::tls::{TlsMode, TlsOption};
 use servers::Mode;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};

-use crate::error::{self, Result, StartFrontendSnafu};
-use crate::options::{Options, TopLevelOptions};
+use crate::error::{self, InitTimezoneSnafu, MissingConfigSnafu, Result, StartFrontendSnafu};
+use crate::options::{CliOptions, Options};
+use crate::App;

 pub struct Instance {
    frontend: FeInstance,
 }

 impl Instance {
-    pub async fn start(&mut self) -> Result<()> {
+    fn new(frontend: FeInstance) -> Self {
+        Self { frontend }
+    }
+
+    pub fn mut_inner(&mut self) -> &mut FeInstance {
+        &mut self.frontend
+    }
+}
+
+#[async_trait]
+impl App for Instance {
+    fn name(&self) -> &str {
+        "greptime-frontend"
+    }
+
+    async fn start(&mut self) -> Result<()> {
        plugins::start_frontend_plugins(self.frontend.plugins().clone())
            .await
            .context(StartFrontendSnafu)?;
@@ -39,7 +66,7 @@ impl Instance {
        self.frontend.start().await.context(StartFrontendSnafu)
    }

-    pub async fn stop(&self) -> Result<()> {
+    async fn stop(&self) -> Result<()> {
        self.frontend
            .shutdown()
            .await
@@ -58,8 +85,8 @@ impl Command {
        self.subcmd.build(opts).await
    }

-    pub fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
-        self.subcmd.load_options(top_level_opts)
+    pub fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
+        self.subcmd.load_options(cli_options)
    }
 }

@@ -75,9 +102,9 @@ impl SubCommand {
        }
    }

-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        match self {
-            SubCommand::Start(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Start(cmd) => cmd.load_options(cli_options),
        }
    }
 }
@@ -100,7 +127,7 @@ pub struct StartCommand {
    config_file: Option<String>,
    #[clap(short, long)]
    influxdb_enable: Option<bool>,
-    #[clap(long, multiple = true, value_delimiter = ',')]
+    #[clap(long, value_delimiter = ',', num_args = 1..)]
    metasrv_addr: Option<Vec<String>>,
    #[clap(long)]
    tls_mode: Option<TlsMode>,
@@ -117,19 +144,19 @@ pub struct StartCommand {
 }

 impl StartCommand {
-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        let mut opts: FrontendOptions = Options::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
            FrontendOptions::env_list_keys(),
        )?;

-        if let Some(dir) = top_level_opts.log_dir {
-            opts.logging.dir = dir;
+        if let Some(dir) = &cli_options.log_dir {
+            opts.logging.dir = dir.clone();
        }

-        if top_level_opts.log_level.is_some() {
-            opts.logging.level = top_level_opts.log_level;
+        if cli_options.log_level.is_some() {
+            opts.logging.level = cli_options.log_level.clone();
        }

        let tls_opts = TlsOption::new(
@@ -196,16 +223,60 @@ impl StartCommand {
        logging::info!("Frontend start command: {:#?}", self);
        logging::info!("Frontend options: {:#?}", opts);

-        let mut instance = FeInstance::try_new_distributed(&opts, plugins.clone())
+        set_default_timezone(opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;
+
+        let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu {
+            msg: "'meta_client'",
+        })?;
+
+        let cache_max_capacity = meta_client_options.metadata_cache_max_capacity;
+        let cache_ttl = meta_client_options.metadata_cache_ttl;
+        let cache_tti = meta_client_options.metadata_cache_tti;
+
+        let meta_client = FeInstance::create_meta_client(meta_client_options)
            .await
            .context(StartFrontendSnafu)?;

+        let cached_meta_backend = CachedMetaKvBackendBuilder::new(meta_client.clone())
+            .cache_max_capacity(cache_max_capacity)
+            .cache_ttl(cache_ttl)
+            .cache_tti(cache_tti)
+            .build();
+        let cached_meta_backend = Arc::new(cached_meta_backend);
+
+        let executor = HandlerGroupExecutor::new(vec![
+            Arc::new(ParseMailboxMessageHandler),
+            Arc::new(InvalidateTableCacheHandler::new(
+                cached_meta_backend.clone(),
+            )),
+        ]);
+
+        let heartbeat_task = HeartbeatTask::new(
+            meta_client.clone(),
+            opts.heartbeat.clone(),
+            Arc::new(executor),
+        );
+
+        let mut instance = FrontendBuilder::new(
+            cached_meta_backend.clone(),
+            Arc::new(DatanodeClients::default()),
+            meta_client,
+        )
+        .with_cache_invalidator(cached_meta_backend)
+        .with_plugin(plugins.clone())
+        .with_heartbeat_task(heartbeat_task)
+        .try_build()
+        .await
+        .context(StartFrontendSnafu)?;
+
+        let servers = Services::new(opts.clone(), Arc::new(instance.clone()), plugins)
+            .build()
+            .context(StartFrontendSnafu)?;
        instance
-            .build_servers(opts)
-            .await
+            .build_servers(opts, servers)
            .context(StartFrontendSnafu)?;

-        Ok(Instance { frontend: instance })
+        Ok(Instance::new(instance))
    }
 }

@@ -221,7 +292,7 @@ mod tests {
    use servers::http::HttpOptions;

    use super::*;
-    use crate::options::ENV_VAR_SEP;
+    use crate::options::{CliOptions, ENV_VAR_SEP};

    #[test]
    fn test_try_from_start_command() {
@@ -235,8 +306,7 @@ mod tests {
            ..Default::default()
        };

-        let Options::Frontend(opts) = command.load_options(TopLevelOptions::default()).unwrap()
-        else {
+        let Options::Frontend(opts) = command.load_options(&CliOptions::default()).unwrap() else {
            unreachable!()
        };

@@ -288,7 +358,7 @@ mod tests {
            ..Default::default()
        };

-        let Options::Frontend(fe_opts) = command.load_options(TopLevelOptions::default()).unwrap()
+        let Options::Frontend(fe_opts) = command.load_options(&CliOptions::default()).unwrap()
        else {
            unreachable!()
        };
@@ -327,16 +397,19 @@ mod tests {
    }

    #[test]
-    fn test_top_level_options() {
+    fn test_load_log_options_from_cli() {
        let cmd = StartCommand {
            disable_dashboard: Some(false),
            ..Default::default()
        };

        let options = cmd
-            .load_options(TopLevelOptions {
+            .load_options(&CliOptions {
                log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
                log_level: Some("debug".to_string()),
+
+                #[cfg(feature = "tokio-console")]
+                tokio_console_addr: None,
            })
            .unwrap();

@@ -416,11 +489,8 @@ mod tests {
                    ..Default::default()
                };

-                let top_level_opts = TopLevelOptions {
-                    log_dir: None,
-                    log_level: Some("error".to_string()),
-                };
-                let Options::Frontend(fe_opts) = command.load_options(top_level_opts).unwrap()
+                let Options::Frontend(fe_opts) =
+                    command.load_options(&CliOptions::default()).unwrap()
                else {
                    unreachable!()
                };
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -12,7 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#![feature(assert_matches)]
+#![feature(assert_matches, let_chains)]
+
+use async_trait::async_trait;
+use clap::arg;
+use common_telemetry::{error, info};

 pub mod cli;
 pub mod datanode;
@@ -21,3 +25,107 @@ pub mod frontend;
 pub mod metasrv;
 pub mod options;
 pub mod standalone;
+
+lazy_static::lazy_static! {
+    static ref APP_VERSION: prometheus::IntGaugeVec =
+        prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["short_version", "version"]).unwrap();
+}
+
+#[async_trait]
+pub trait App {
+    fn name(&self) -> &str;
+
+    /// A hook for implementor to make something happened before actual startup. Defaults to no-op.
+    fn pre_start(&mut self) -> error::Result<()> {
+        Ok(())
+    }
+
+    async fn start(&mut self) -> error::Result<()>;
+
+    async fn stop(&self) -> error::Result<()>;
+}
+
+pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
+    let name = app.name().to_string();
+
+    app.pre_start()?;
+
+    tokio::select! {
+        result = app.start() => {
+            if let Err(err) = result {
+                error!(err; "Failed to start app {name}!");
+            }
+        }
+        _ = tokio::signal::ctrl_c() => {
+            if let Err(err) = app.stop().await {
+                error!(err; "Failed to stop app {name}!");
+            }
+            info!("Goodbye!");
+        }
+    }
+
+    Ok(())
+}
+
+pub fn log_versions() {
+    // Report app version as gauge.
+    APP_VERSION
+        .with_label_values(&[short_version(), full_version()])
+        .inc();
+
+    // Log version and argument flags.
+    info!(
+        "short_version: {}, full_version: {}",
+        short_version(),
+        full_version()
+    );
+
+    log_env_flags();
+}
+
+pub fn greptimedb_cli() -> clap::Command {
+    let cmd = clap::Command::new("greptimedb")
+        .version(print_version())
+        .subcommand_required(true);
+
+    #[cfg(feature = "tokio-console")]
+    let cmd = cmd.arg(arg!(--"tokio-console-addr"[TOKIO_CONSOLE_ADDR]));
+
+    cmd.args([arg!(--"log-dir"[LOG_DIR]), arg!(--"log-level"[LOG_LEVEL])])
+}
+
+fn print_version() -> &'static str {
+    concat!(
+        "\nbranch: ",
+        env!("GIT_BRANCH"),
+        "\ncommit: ",
+        env!("GIT_COMMIT"),
+        "\ndirty: ",
+        env!("GIT_DIRTY"),
+        "\nversion: ",
+        env!("CARGO_PKG_VERSION")
+    )
+}
+
+fn short_version() -> &'static str {
+    env!("CARGO_PKG_VERSION")
+}
+
+// {app_name}-{branch_name}-{commit_short}
+// The branch name (tag) of a release build should already contain the short
+// version so the full version doesn't concat the short version explicitly.
+fn full_version() -> &'static str {
+    concat!(
+        "greptimedb-",
+        env!("GIT_BRANCH"),
+        "-",
+        env!("GIT_COMMIT_SHORT")
+    )
+}
+
+fn log_env_flags() {
+    info!("command line arguments");
+    for argument in std::env::args() {
+        info!("argument: {}", argument);
+    }
+}
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -14,6 +14,7 @@

 use std::time::Duration;

+use async_trait::async_trait;
 use clap::Parser;
 use common_telemetry::logging;
 use meta_srv::bootstrap::MetaSrvInstance;
@@ -21,21 +22,34 @@ use meta_srv::metasrv::MetaSrvOptions;
 use snafu::ResultExt;

 use crate::error::{self, Result, StartMetaServerSnafu};
-use crate::options::{Options, TopLevelOptions};
+use crate::options::{CliOptions, Options};
+use crate::App;

 pub struct Instance {
    instance: MetaSrvInstance,
 }

 impl Instance {
-    pub async fn start(&mut self) -> Result<()> {
+    fn new(instance: MetaSrvInstance) -> Self {
+        Self { instance }
+    }
+}
+
+#[async_trait]
+impl App for Instance {
+    fn name(&self) -> &str {
+        "greptime-metasrv"
+    }
+
+    async fn start(&mut self) -> Result<()> {
        plugins::start_meta_srv_plugins(self.instance.plugins())
            .await
            .context(StartMetaServerSnafu)?;
+
        self.instance.start().await.context(StartMetaServerSnafu)
    }

-    pub async fn stop(&self) -> Result<()> {
+    async fn stop(&self) -> Result<()> {
        self.instance
            .shutdown()
            .await
@@ -54,8 +68,8 @@ impl Command {
        self.subcmd.build(opts).await
    }

-    pub fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
-        self.subcmd.load_options(top_level_opts)
+    pub fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
+        self.subcmd.load_options(cli_options)
    }
 }

@@ -71,9 +85,9 @@ impl SubCommand {
        }
    }

-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        match self {
-            SubCommand::Start(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Start(cmd) => cmd.load_options(cli_options),
        }
    }
 }
@@ -100,22 +114,29 @@ struct StartCommand {
    http_timeout: Option<u64>,
    #[clap(long, default_value = "GREPTIMEDB_METASRV")]
    env_prefix: String,
+    /// The working home directory of this metasrv instance.
+    #[clap(long)]
+    data_home: Option<String>,
+
+    /// If it's not empty, the metasrv will store all data with this key prefix.
+    #[clap(long, default_value = "")]
+    store_key_prefix: String,
 }

 impl StartCommand {
-    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        let mut opts: MetaSrvOptions = Options::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
-            None,
+            MetaSrvOptions::env_list_keys(),
        )?;

-        if let Some(dir) = top_level_opts.log_dir {
-            opts.logging.dir = dir;
+        if let Some(dir) = &cli_options.log_dir {
+            opts.logging.dir = dir.clone();
        }

-        if top_level_opts.log_level.is_some() {
-            opts.logging.level = top_level_opts.log_level;
+        if cli_options.log_level.is_some() {
+            opts.logging.level = cli_options.log_level.clone();
        }

        if let Some(addr) = &self.bind_addr {
@@ -152,6 +173,14 @@ impl StartCommand {
            opts.http.timeout = Duration::from_secs(http_timeout);
        }

+        if let Some(data_home) = &self.data_home {
+            opts.data_home = data_home.clone();
+        }
+
+        if !self.store_key_prefix.is_empty() {
+            opts.store_key_prefix = self.store_key_prefix.clone()
+        }
+
        // Disable dashboard in metasrv.
        opts.http.disable_dashboard = true;

@@ -166,11 +195,16 @@ impl StartCommand {
        logging::info!("MetaSrv start command: {:#?}", self);
        logging::info!("MetaSrv options: {:#?}", opts);

-        let instance = MetaSrvInstance::new(opts, plugins)
+        let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
+            .await
+            .context(error::BuildMetaServerSnafu)?;
+        let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;
+
+        let instance = MetaSrvInstance::new(opts, plugins, metasrv)
            .await
            .context(error::BuildMetaServerSnafu)?;

-        Ok(Instance { instance })
+        Ok(Instance::new(instance))
    }
 }

@@ -194,8 +228,7 @@ mod tests {
            ..Default::default()
        };

-        let Options::Metasrv(options) = cmd.load_options(TopLevelOptions::default()).unwrap()
-        else {
+        let Options::Metasrv(options) = cmd.load_options(&CliOptions::default()).unwrap() else {
            unreachable!()
        };
        assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
@@ -230,8 +263,7 @@ mod tests {
            ..Default::default()
        };

-        let Options::Metasrv(options) = cmd.load_options(TopLevelOptions::default()).unwrap()
-        else {
+        let Options::Metasrv(options) = cmd.load_options(&CliOptions::default()).unwrap() else {
            unreachable!()
        };
        assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
@@ -262,7 +294,7 @@ mod tests {
    }

    #[test]
-    fn test_top_level_options() {
+    fn test_load_log_options_from_cli() {
        let cmd = StartCommand {
            bind_addr: Some("127.0.0.1:3002".to_string()),
            server_addr: Some("127.0.0.1:3002".to_string()),
@@ -272,9 +304,12 @@ mod tests {
        };

        let options = cmd
-            .load_options(TopLevelOptions {
+            .load_options(&CliOptions {
                log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
                log_level: Some("debug".to_string()),
+
+                #[cfg(feature = "tokio-console")]
+                tokio_console_addr: None,
            })
            .unwrap();

@@ -333,8 +368,7 @@ mod tests {
                    ..Default::default()
                };

-                let Options::Metasrv(opts) =
-                    command.load_options(TopLevelOptions::default()).unwrap()
+                let Options::Metasrv(opts) = command.load_options(&CliOptions::default()).unwrap()
                else {
                    unreachable!()
                };
--- a/src/cmd/src/options.rs
+++ b/src/cmd/src/options.rs
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use clap::ArgMatches;
 use common_config::KvBackendConfig;
-use common_telemetry::logging::LoggingOptions;
+use common_telemetry::logging::{LoggingOptions, TracingOptions};
+use common_wal::config::MetaSrvWalConfig;
 use config::{Config, Environment, File, FileFormat};
 use datanode::config::{DatanodeOptions, ProcedureConfig};
 use frontend::error::{Result as FeResult, TomlFormatSnafu};
@@ -28,7 +30,7 @@ pub const ENV_VAR_SEP: &str = "__";
 pub const ENV_LIST_SEP: &str = ",";

 /// Options mixed up from datanode, frontend and metasrv.
-#[derive(Serialize)]
+#[derive(Serialize, Debug, Clone)]
 pub struct MixOptions {
    pub data_home: String,
    pub procedure: ProcedureConfig,
@@ -36,6 +38,7 @@ pub struct MixOptions {
    pub frontend: FrontendOptions,
    pub datanode: DatanodeOptions,
    pub logging: LoggingOptions,
+    pub wal_meta: MetaSrvWalConfig,
 }

 impl From<MixOptions> for FrontendOptions {
@@ -58,10 +61,32 @@ pub enum Options {
    Cli(Box<LoggingOptions>),
 }

-#[derive(Clone, Debug, Default)]
-pub struct TopLevelOptions {
+#[derive(Default)]
+pub struct CliOptions {
    pub log_dir: Option<String>,
    pub log_level: Option<String>,
+
+    #[cfg(feature = "tokio-console")]
+    pub tokio_console_addr: Option<String>,
+}
+
+impl CliOptions {
+    pub fn new(args: &ArgMatches) -> Self {
+        Self {
+            log_dir: args.get_one::<String>("log-dir").cloned(),
+            log_level: args.get_one::<String>("log-level").cloned(),
+
+            #[cfg(feature = "tokio-console")]
+            tokio_console_addr: args.get_one::<String>("tokio-console-addr").cloned(),
+        }
+    }
+
+    pub fn tracing_options(&self) -> TracingOptions {
+        TracingOptions {
+            #[cfg(feature = "tokio-console")]
+            tokio_console_addr: self.tokio_console_addr.clone(),
+        }
+    }
 }

 impl Options {
@@ -133,14 +158,23 @@ impl Options {

        Ok(opts)
    }
+
+    pub fn node_id(&self) -> Option<String> {
+        match self {
+            Options::Metasrv(_) | Options::Cli(_) => None,
+            Options::Datanode(opt) => opt.node_id.map(|x| x.to_string()),
+            Options::Frontend(opt) => opt.node_id.clone(),
+            Options::Standalone(opt) => opt.frontend.node_id.clone(),
+        }
+    }
 }

 #[cfg(test)]
 mod tests {
    use std::io::Write;
-    use std::time::Duration;

    use common_test_util::temp_dir::create_named_temp_file;
+    use common_wal::config::DatanodeWalConfig;
    use datanode::config::{DatanodeOptions, ObjectStoreConfig};

    use super::*;
@@ -163,6 +197,7 @@ mod tests {
            tcp_nodelay = true

            [wal]
+            provider = "raft_engine"
            dir = "/tmp/greptimedb/wal"
            file_size = "1GB"
            purge_threshold = "50GB"
@@ -170,11 +205,6 @@ mod tests {
            read_batch_size = 128
            sync_write = false

-            [storage.compaction]
-            max_inflight_tasks = 3
-            max_files_in_level0 = 7
-            max_purge_tasks = 32
-
            [logging]
            level = "debug"
            dir = "/tmp/greptimedb/test/logs"
@@ -185,17 +215,6 @@ mod tests {
        temp_env::with_vars(
            // The following environment variables will be used to override the values in the config file.
            [
-                (
-                    // storage.manifest.checkpoint_margin = 99
-                    [
-                        env_prefix.to_string(),
-                        "storage".to_uppercase(),
-                        "manifest".to_uppercase(),
-                        "checkpoint_margin".to_uppercase(),
-                    ]
-                    .join(ENV_VAR_SEP),
-                    Some("99"),
-                ),
                (
                    // storage.type = S3
                    [
@@ -216,17 +235,6 @@ mod tests {
                    .join(ENV_VAR_SEP),
                    Some("mybucket"),
                ),
-                (
-                    // storage.manifest.gc_duration = 42s
-                    [
-                        env_prefix.to_string(),
-                        "storage".to_uppercase(),
-                        "manifest".to_uppercase(),
-                        "gc_duration".to_uppercase(),
-                    ]
-                    .join(ENV_VAR_SEP),
-                    Some("42s"),
-                ),
                (
                    // wal.dir = /other/wal/dir
                    [
@@ -257,17 +265,12 @@ mod tests {
                .unwrap();

                // Check the configs from environment variables.
-                assert_eq!(opts.storage.manifest.checkpoint_margin, Some(99));
-                match opts.storage.store {
+                match &opts.storage.store {
                    ObjectStoreConfig::S3(s3_config) => {
                        assert_eq!(s3_config.bucket, "mybucket".to_string());
                    }
                    _ => panic!("unexpected store type"),
                }
-                assert_eq!(
-                    opts.storage.manifest.gc_duration,
-                    Some(Duration::from_secs(42))
-                );
                assert_eq!(
                    opts.meta_client.unwrap().metasrv_addrs,
                    vec![
@@ -278,7 +281,10 @@ mod tests {
                );

                // Should be the values from config file, not environment variables.
-                assert_eq!(opts.wal.dir.unwrap(), "/tmp/greptimedb/wal");
+                let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
+                    unreachable!()
+                };
+                assert_eq!(raft_engine_config.dir.unwrap(), "/tmp/greptimedb/wal");

                // Should be default values.
                assert_eq!(opts.node_id, None);
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -15,38 +15,50 @@
 use std::sync::Arc;
 use std::{fs, path};

-use catalog::kvbackend::KvBackendCatalogManager;
-use catalog::CatalogManagerRef;
+use async_trait::async_trait;
 use clap::Parser;
-use common_base::Plugins;
-use common_config::{metadata_store_dir, KvBackendConfig, WalConfig};
-use common_meta::cache_invalidator::DummyKvCacheInvalidator;
+use common_catalog::consts::MIN_USER_TABLE_ID;
+use common_config::{metadata_store_dir, KvBackendConfig};
+use common_meta::cache_invalidator::DummyCacheInvalidator;
+use common_meta::datanode_manager::DatanodeManagerRef;
+use common_meta::ddl::table_meta::TableMetadataAllocator;
+use common_meta::ddl::DdlTaskExecutorRef;
+use common_meta::ddl_manager::DdlManager;
+use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
+use common_meta::region_keeper::MemoryRegionKeeper;
+use common_meta::sequence::SequenceBuilder;
+use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
 use common_procedure::ProcedureManagerRef;
 use common_telemetry::info;
 use common_telemetry::logging::LoggingOptions;
+use common_time::timezone::set_default_timezone;
+use common_wal::config::StandaloneWalConfig;
 use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
 use datanode::datanode::{Datanode, DatanodeBuilder};
-use datanode::region_server::RegionServer;
 use file_engine::config::EngineConfig as FileEngineConfig;
 use frontend::frontend::FrontendOptions;
+use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
+use frontend::server::Services;
 use frontend::service_config::{
    GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
 };
 use mito2::config::MitoConfig;
 use serde::{Deserialize, Serialize};
+use servers::export_metrics::ExportMetricsOption;
 use servers::http::HttpOptions;
 use servers::tls::{TlsMode, TlsOption};
 use servers::Mode;
 use snafu::ResultExt;

 use crate::error::{
-    CreateDirSnafu, IllegalConfigSnafu, InitMetadataSnafu, Result, ShutdownDatanodeSnafu,
-    ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu,
-    StopProcedureManagerSnafu,
+    CreateDirSnafu, IllegalConfigSnafu, InitDdlManagerSnafu, InitMetadataSnafu, InitTimezoneSnafu,
+    Result, ShutdownDatanodeSnafu, ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu,
+    StartProcedureManagerSnafu, StartWalOptionsAllocatorSnafu, StopProcedureManagerSnafu,
 };
-use crate::options::{MixOptions, Options, TopLevelOptions};
+use crate::options::{CliOptions, MixOptions, Options};
+use crate::App;

 #[derive(Parser)]
 pub struct Command {
@@ -59,8 +71,8 @@ impl Command {
        self.subcmd.build(opts).await
    }

-    pub fn load_options(&self, top_level_options: TopLevelOptions) -> Result<Options> {
-        self.subcmd.load_options(top_level_options)
+    pub fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
+        self.subcmd.load_options(cli_options)
    }
 }

@@ -76,9 +88,9 @@ impl SubCommand {
        }
    }

-    fn load_options(&self, top_level_options: TopLevelOptions) -> Result<Options> {
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
        match self {
-            SubCommand::Start(cmd) => cmd.load_options(top_level_options),
+            SubCommand::Start(cmd) => cmd.load_options(cli_options),
        }
    }
 }
@@ -88,6 +100,7 @@ impl SubCommand {
 pub struct StandaloneOptions {
    pub mode: Mode,
    pub enable_telemetry: bool,
+    pub default_timezone: Option<String>,
    pub http: HttpOptions,
    pub grpc: GrpcOptions,
    pub mysql: MysqlOptions,
@@ -95,7 +108,7 @@ pub struct StandaloneOptions {
    pub opentsdb: OpentsdbOptions,
    pub influxdb: InfluxdbOptions,
    pub prom_store: PromStoreOptions,
-    pub wal: WalConfig,
+    pub wal: StandaloneWalConfig,
    pub storage: StorageConfig,
    pub metadata_store: KvBackendConfig,
    pub procedure: ProcedureConfig,
@@ -103,6 +116,13 @@ pub struct StandaloneOptions {
    pub user_provider: Option<String>,
    /// Options for different store engines.
    pub region_engine: Vec<RegionEngineConfig>,
+    pub export_metrics: ExportMetricsOption,
+}
+
+impl StandaloneOptions {
+    pub fn env_list_keys() -> Option<&'static [&'static str]> {
+        Some(&["wal.broker_endpoints"])
+    }
 }

 impl Default for StandaloneOptions {
@@ -110,6 +130,7 @@ impl Default for StandaloneOptions {
        Self {
            mode: Mode::Standalone,
            enable_telemetry: true,
+            default_timezone: None,
            http: HttpOptions::default(),
            grpc: GrpcOptions::default(),
            mysql: MysqlOptions::default(),
@@ -117,11 +138,12 @@ impl Default for StandaloneOptions {
            opentsdb: OpentsdbOptions::default(),
            influxdb: InfluxdbOptions::default(),
            prom_store: PromStoreOptions::default(),
-            wal: WalConfig::default(),
+            wal: StandaloneWalConfig::default(),
            storage: StorageConfig::default(),
            metadata_store: KvBackendConfig::default(),
            procedure: ProcedureConfig::default(),
            logging: LoggingOptions::default(),
+            export_metrics: ExportMetricsOption::default(),
            user_provider: None,
            region_engine: vec![
                RegionEngineConfig::Mito(MitoConfig::default()),
@@ -135,6 +157,7 @@ impl StandaloneOptions {
    fn frontend_options(self) -> FrontendOptions {
        FrontendOptions {
            mode: self.mode,
+            default_timezone: self.default_timezone,
            http: self.http,
            grpc: self.grpc,
            mysql: self.mysql,
@@ -145,6 +168,8 @@ impl StandaloneOptions {
            meta_client: None,
            logging: self.logging,
            user_provider: self.user_provider,
+            // Handle the export metrics task run by standalone to frontend for execution
+            export_metrics: self.export_metrics,
            ..Default::default()
        }
    }
@@ -153,9 +178,10 @@ impl StandaloneOptions {
        DatanodeOptions {
            node_id: Some(0),
            enable_telemetry: self.enable_telemetry,
-            wal: self.wal,
+            wal: self.wal.into(),
            storage: self.storage,
            region_engine: self.region_engine,
+            rpc_addr: self.grpc.addr,
            ..Default::default()
        }
    }
@@ -165,24 +191,37 @@ pub struct Instance {
    datanode: Datanode,
    frontend: FeInstance,
    procedure_manager: ProcedureManagerRef,
+    wal_options_allocator: WalOptionsAllocatorRef,
 }

-impl Instance {
-    pub async fn start(&mut self) -> Result<()> {
-        // Start datanode instance before starting services, to avoid requests come in before internal components are started.
-        self.datanode.start().await.context(StartDatanodeSnafu)?;
-        info!("Datanode instance started");
+#[async_trait]
+impl App for Instance {
+    fn name(&self) -> &str {
+        "greptime-standalone"
+    }
+
+    async fn start(&mut self) -> Result<()> {
+        self.datanode.start_telemetry();

        self.procedure_manager
            .start()
            .await
            .context(StartProcedureManagerSnafu)?;

+        self.wal_options_allocator
+            .start()
+            .await
+            .context(StartWalOptionsAllocatorSnafu)?;
+
+        plugins::start_frontend_plugins(self.frontend.plugins().clone())
+            .await
+            .context(StartFrontendSnafu)?;
+
        self.frontend.start().await.context(StartFrontendSnafu)?;
        Ok(())
    }

-    pub async fn stop(&self) -> Result<()> {
+    async fn stop(&self) -> Result<()> {
        self.frontend
            .shutdown()
            .await
@@ -204,7 +243,7 @@ impl Instance {
 }

 #[derive(Debug, Default, Parser)]
-struct StartCommand {
+pub struct StartCommand {
    #[clap(long)]
    http_addr: Option<String>,
    #[clap(long)]
@@ -218,7 +257,7 @@ struct StartCommand {
    #[clap(short, long)]
    influxdb_enable: bool,
    #[clap(short, long)]
-    config_file: Option<String>,
+    pub config_file: Option<String>,
    #[clap(long)]
    tls_mode: Option<TlsMode>,
    #[clap(long)]
@@ -228,25 +267,36 @@ struct StartCommand {
    #[clap(long)]
    user_provider: Option<String>,
    #[clap(long, default_value = "GREPTIMEDB_STANDALONE")]
-    env_prefix: String,
+    pub env_prefix: String,
+    /// The working home directory of this standalone instance.
+    #[clap(long)]
+    data_home: Option<String>,
 }

 impl StartCommand {
-    fn load_options(&self, top_level_options: TopLevelOptions) -> Result<Options> {
-        let mut opts: StandaloneOptions = Options::load_layered_options(
+    fn load_options(&self, cli_options: &CliOptions) -> Result<Options> {
+        let opts: StandaloneOptions = Options::load_layered_options(
            self.config_file.as_deref(),
            self.env_prefix.as_ref(),
-            None,
+            StandaloneOptions::env_list_keys(),
        )?;

+        self.convert_options(cli_options, opts)
+    }
+
+    pub fn convert_options(
+        &self,
+        cli_options: &CliOptions,
+        mut opts: StandaloneOptions,
+    ) -> Result<Options> {
        opts.mode = Mode::Standalone;

-        if let Some(dir) = top_level_options.log_dir {
-            opts.logging.dir = dir;
+        if let Some(dir) = &cli_options.log_dir {
+            opts.logging.dir = dir.clone();
        }

-        if top_level_options.log_level.is_some() {
-            opts.logging.level = top_level_options.log_level;
+        if cli_options.log_level.is_some() {
+            opts.logging.level = cli_options.log_level.clone();
        }

        let tls_opts = TlsOption::new(
@@ -259,6 +309,10 @@ impl StartCommand {
            opts.http.addr = addr.clone()
        }

+        if let Some(data_home) = &self.data_home {
+            opts.storage.data_home = data_home.clone();
+        }
+
        if let Some(addr) = &self.rpc_addr {
            // frontend grpc addr conflict with datanode default grpc addr
            let datanode_grpc_addr = DatanodeOptions::default().rpc_addr;
@@ -300,7 +354,8 @@ impl StartCommand {
        let procedure = opts.procedure.clone();
        let frontend = opts.clone().frontend_options();
        let logging = opts.logging.clone();
-        let datanode = opts.datanode_options();
+        let wal_meta = opts.wal.clone().into();
+        let datanode = opts.datanode_options().clone();

        Ok(Options::Standalone(Box::new(MixOptions {
            procedure,
@@ -309,6 +364,7 @@ impl StartCommand {
            frontend,
            datanode,
            logging,
+            wal_meta,
        })))
    }

@@ -316,19 +372,18 @@ impl StartCommand {
    #[allow(unused_variables)]
    #[allow(clippy::diverging_sub_expression)]
    async fn build(self, opts: MixOptions) -> Result<Instance> {
-        let mut fe_opts = opts.frontend.clone();
+        info!("Standalone start command: {:#?}", self);
+        info!("Building standalone instance with {opts:#?}");
+
+        let mut fe_opts = opts.frontend;
        #[allow(clippy::unnecessary_mut_passed)]
        let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts) // mut ref is MUST, DO NOT change it
            .await
            .context(StartFrontendSnafu)?;

-        let dn_opts = opts.datanode.clone();
+        let dn_opts = opts.datanode;

-        info!("Standalone start command: {:#?}", self);
-        info!(
-            "Standalone frontend options: {:#?}, datanode options: {:#?}",
-            fe_opts, dn_opts
-        );
+        set_default_timezone(fe_opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;

        // Ensure the data_home directory exists.
        fs::create_dir_all(path::Path::new(&opts.data_home)).context(CreateDirSnafu {
@@ -344,69 +399,94 @@ impl StartCommand {
        .await
        .context(StartFrontendSnafu)?;

-        let datanode = DatanodeBuilder::new(
-            dn_opts.clone(),
-            Some(kv_backend.clone()),
-            Default::default(),
-        )
-        .build()
-        .await
-        .context(StartDatanodeSnafu)?;
-        let region_server = datanode.region_server();
+        let builder =
+            DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
+        let datanode = builder.build().await.context(StartDatanodeSnafu)?;

-        let catalog_manager = KvBackendCatalogManager::new(
+        let datanode_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
+
+        let table_id_sequence = Arc::new(
+            SequenceBuilder::new("table_id", kv_backend.clone())
+                .initial(MIN_USER_TABLE_ID as u64)
+                .step(10)
+                .build(),
+        );
+        let wal_options_allocator = Arc::new(WalOptionsAllocator::new(
+            opts.wal_meta.clone(),
            kv_backend.clone(),
-            Arc::new(DummyKvCacheInvalidator),
-            Arc::new(StandaloneDatanodeManager(region_server.clone())),
+        ));
+
+        let table_metadata_manager =
+            Self::create_table_metadata_manager(kv_backend.clone()).await?;
+
+        let table_meta_allocator = TableMetadataAllocator::new(
+            table_id_sequence,
+            wal_options_allocator.clone(),
+            table_metadata_manager.clone(),
        );

-        catalog_manager
-            .table_metadata_manager_ref()
-            .init()
-            .await
-            .context(InitMetadataSnafu)?;
-
-        // TODO: build frontend instance like in distributed mode
-        let mut frontend = build_frontend(
-            fe_plugins,
-            kv_backend,
+        let ddl_task_executor = Self::create_ddl_task_executor(
+            table_metadata_manager,
            procedure_manager.clone(),
-            catalog_manager,
-            region_server,
+            datanode_manager.clone(),
+            table_meta_allocator,
        )
        .await?;

-        frontend
-            .build_servers(opts)
+        let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
+            .with_plugin(fe_plugins.clone())
+            .try_build()
            .await
            .context(StartFrontendSnafu)?;

+        let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
+            .build()
+            .context(StartFrontendSnafu)?;
+        frontend
+            .build_servers(fe_opts, servers)
+            .context(StartFrontendSnafu)?;
+
        Ok(Instance {
            datanode,
            frontend,
            procedure_manager,
+            wal_options_allocator,
        })
    }
-}

-/// Build frontend instance in standalone mode
-async fn build_frontend(
-    plugins: Plugins,
-    kv_backend: KvBackendRef,
-    procedure_manager: ProcedureManagerRef,
-    catalog_manager: CatalogManagerRef,
-    region_server: RegionServer,
-) -> Result<FeInstance> {
-    let frontend_instance = FeInstance::try_new_standalone(
-        kv_backend,
-        procedure_manager,
-        catalog_manager,
-        plugins,
-        region_server,
-    )
-    .await
-    .context(StartFrontendSnafu)?;
-    Ok(frontend_instance)
+    pub async fn create_ddl_task_executor(
+        table_metadata_manager: TableMetadataManagerRef,
+        procedure_manager: ProcedureManagerRef,
+        datanode_manager: DatanodeManagerRef,
+        table_meta_allocator: TableMetadataAllocator,
+    ) -> Result<DdlTaskExecutorRef> {
+        let ddl_task_executor: DdlTaskExecutorRef = Arc::new(
+            DdlManager::try_new(
+                procedure_manager,
+                datanode_manager,
+                Arc::new(DummyCacheInvalidator),
+                table_metadata_manager,
+                table_meta_allocator,
+                Arc::new(MemoryRegionKeeper::default()),
+            )
+            .context(InitDdlManagerSnafu)?,
+        );
+
+        Ok(ddl_task_executor)
+    }
+
+    pub async fn create_table_metadata_manager(
+        kv_backend: KvBackendRef,
+    ) -> Result<TableMetadataManagerRef> {
+        let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
+
+        table_metadata_manager
+            .init()
+            .await
+            .context(InitMetadataSnafu)?;
+
+        Ok(table_metadata_manager)
+    }
 }

 #[cfg(test)]
@@ -418,10 +498,12 @@ mod tests {
    use auth::{Identity, Password, UserProviderRef};
    use common_base::readable_size::ReadableSize;
    use common_test_util::temp_dir::create_named_temp_file;
+    use common_wal::config::DatanodeWalConfig;
+    use datanode::config::{FileConfig, GcsConfig};
    use servers::Mode;

    use super::*;
-    use crate::options::ENV_VAR_SEP;
+    use crate::options::{CliOptions, ENV_VAR_SEP};

    #[tokio::test]
    async fn test_try_from_start_command_to_anymap() {
@@ -459,6 +541,7 @@ mod tests {
            enable_memory_catalog = true

            [wal]
+            provider = "raft_engine"
            dir = "/tmp/greptimedb/test/wal"
            file_size = "1GB"
            purge_threshold = "50GB"
@@ -467,6 +550,15 @@ mod tests {
            sync_write = false

            [storage]
+            data_home = "/tmp/greptimedb/"
+            type = "File"
+
+            [[storage.providers]]
+            type = "Gcs"
+            bucket = "foo"
+            endpoint = "bar"
+
+            [[storage.providers]]
            type = "S3"
            access_key_id = "access_key_id"
            secret_access_key = "secret_access_key"
@@ -496,8 +588,7 @@ mod tests {
            ..Default::default()
        };

-        let Options::Standalone(options) = cmd.load_options(TopLevelOptions::default()).unwrap()
-        else {
+        let Options::Standalone(options) = cmd.load_options(&CliOptions::default()).unwrap() else {
            unreachable!()
        };
        let fe_opts = options.frontend;
@@ -514,9 +605,21 @@ mod tests {
        assert_eq!(None, fe_opts.mysql.reject_no_database);
        assert!(fe_opts.influxdb.enable);

-        assert_eq!("/tmp/greptimedb/test/wal", dn_opts.wal.dir.unwrap());
+        let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
+            unreachable!()
+        };
+        assert_eq!("/tmp/greptimedb/test/wal", raft_engine_config.dir.unwrap());

-        match &dn_opts.storage.store {
+        assert!(matches!(
+            &dn_opts.storage.store,
+            datanode::config::ObjectStoreConfig::File(FileConfig { .. })
+        ));
+        assert_eq!(dn_opts.storage.providers.len(), 2);
+        assert!(matches!(
+            dn_opts.storage.providers[0],
+            datanode::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
+        ));
+        match &dn_opts.storage.providers[1] {
            datanode::config::ObjectStoreConfig::S3(s3_config) => {
                assert_eq!(
                    "Secret([REDACTED alloc::string::String])".to_string(),
@@ -533,16 +636,19 @@ mod tests {
    }

    #[test]
-    fn test_top_level_options() {
+    fn test_load_log_options_from_cli() {
        let cmd = StartCommand {
            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
            ..Default::default()
        };

        let Options::Standalone(opts) = cmd
-            .load_options(TopLevelOptions {
+            .load_options(&CliOptions {
                log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
                log_level: Some("debug".to_string()),
+
+                #[cfg(feature = "tokio-console")]
+                tokio_console_addr: None,
            })
            .unwrap()
        else {
@@ -609,11 +715,8 @@ mod tests {
                    ..Default::default()
                };

-                let top_level_opts = TopLevelOptions {
-                    log_dir: None,
-                    log_level: None,
-                };
-                let Options::Standalone(opts) = command.load_options(top_level_opts).unwrap()
+                let Options::Standalone(opts) =
+                    command.load_options(&CliOptions::default()).unwrap()
                else {
                    unreachable!()
                };
--- a/src/common/base/Cargo.toml
+++ b/src/common/base/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 [dependencies]
 anymap = "1.0.0-beta.2"
 bitvec = "1.0"
-bytes = { version = "1.1", features = ["serde"] }
+bytes.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 paste = "1.0"
--- a/src/common/base/src/bytes.rs
+++ b/src/common/base/src/bytes.rs
@@ -216,7 +216,7 @@ mod tests {
        let bytes = StringBytes::from(hello.clone());
        assert_eq!(bytes.len(), hello.len());

-        let zero = "".to_string();
+        let zero = String::default();
        let bytes = StringBytes::from(zero);
        assert!(bytes.is_empty());
    }
--- a/src/common/base/src/readable_size.rs
+++ b/src/common/base/src/readable_size.rs
@@ -33,7 +33,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE;
 pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE;
 pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE;

-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
+#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
 pub struct ReadableSize(pub u64);

 impl ReadableSize {
--- a/Show More
+++ b/Show More