feat: truncate region (#2097 )

* feat: impl truncate region * test: test truncate region * chore: typo * refactor: table truncate * chore: remove useless changes * chore: reset version * fix: wait for flush task to complete * fix: clippy * chore: remove useless changes * CR Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/engine.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/engine.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/region.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/region/tests/truncate.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/region/tests/truncate.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/region/writer.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * CR * Update src/storage/src/engine.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * Update src/storage/src/manifest/region.rs Co-authored-by: Yingwen <realevenyag@gmail.com> --------- Co-authored-by: Yingwen <realevenyag@gmail.com>
feat(mito): Implement mito2 Wal (#2103 )
2025-12-22 22:20:02 +00:00 · 2023-08-04 12:26:25 +00:00 · 2023-08-04 11:04:25 +00:00 · 2023-08-04 17:48:11 +08:00 · 2023-08-04 09:08:07 +00:00 · 2023-08-04 08:38:40 +00:00
1082 changed files with 105286 additions and 31335 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -3,3 +3,14 @@ linker = "aarch64-linux-gnu-gcc"

 [alias]
 sqlness = "run --bin sqlness-runner --"
+
+
+[build]
+rustflags = [
+    # lints
+    # TODO: use lint configuration in cargo https://github.com/rust-lang/cargo/issues/5034
+    "-Wclippy::print_stdout",
+    "-Wclippy::print_stderr",
+    "-Wclippy::implicit_clone",
+    "-Aclippy::items_after_test_module",
+]
--- a/.dockerignore
+++ b/.dockerignore
@@ -20,6 +20,3 @@ out/

 # Rust
 target/
-
-# Git
-.git
--- a/.env.example
+++ b/.env.example
@@ -2,9 +2,20 @@
 GT_S3_BUCKET=S3 bucket
 GT_S3_ACCESS_KEY_ID=S3 access key id
 GT_S3_ACCESS_KEY=S3 secret access key
-
+GT_S3_ENDPOINT_URL=S3 endpoint url
+GT_S3_REGION=S3 region
 # Settings for oss test
 GT_OSS_BUCKET=OSS bucket
 GT_OSS_ACCESS_KEY_ID=OSS access key id
 GT_OSS_ACCESS_KEY=OSS access key
 GT_OSS_ENDPOINT=OSS endpoint
+# Settings for azblob test
+GT_AZBLOB_CONTAINER=AZBLOB container
+GT_AZBLOB_ACCOUNT_NAME=AZBLOB account name
+GT_AZBLOB_ACCOUNT_KEY=AZBLOB account key
+GT_AZBLOB_ENDPOINT=AZBLOB endpoint
+# Settings for gcs test 
+GT_GCS_BUCKET = GCS bucket 
+GT_GCS_SCOPE  = GCS scope
+GT_GCS_CREDENTIAL_PATH = GCS credential path 
+GT_GCS_ENDPOINT = GCS end point
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -81,6 +81,5 @@ body:
        Please walk us through and provide steps and details on how
        to reproduce the issue. If possible, provide scripts that we
        can run to trigger the bug.
-      render: bash
    validations:
      required: true
--- a/.github/actions/build-dev-builder-image/action.yml
+++ b/.github/actions/build-dev-builder-image/action.yml
@@ -0,0 +1,70 @@
+name: Build and push dev-builder image
+description: Build and push dev-builder image to DockerHub and ACR
+inputs:
+  dockerhub-image-registry:
+    description: The dockerhub image registry to store the images
+    required: false
+    default: docker.io
+  dockerhub-image-registry-username:
+    description: The dockerhub username to login to the image registry
+    required: true
+  dockerhub-image-registry-password:
+    description: The dockerhub password to login to the image registry
+    required: true
+  dockerhub-image-namespace:
+    description: The dockerhub namespace of the image registry to store the images
+    required: false
+    default: greptime
+  acr-image-registry:
+    description: The ACR image registry to store the images
+    required: true
+  acr-image-registry-username:
+    description: The ACR username to login to the image registry
+    required: true
+  acr-image-registry-password:
+    description: The ACR password to login to the image registry
+    required: true
+  acr-image-namespace:
+    description: The ACR namespace of the image registry to store the images
+    required: false
+    default: greptime
+  version:
+    description: Version of the dev-builder
+    required: false
+    default: latest
+runs:
+  using: composite
+  steps:
+    - name: Login to Dockerhub
+      uses: docker/login-action@v2
+      with:
+        registry: ${{ inputs.dockerhub-image-registry }}
+        username: ${{ inputs.dockerhub-image-registry-username }}
+        password: ${{ inputs.dockerhub-image-registry-password }}
+
+    - name: Build and push dev builder image to dockerhub
+      shell: bash
+      run:
+        make dev-builder \
+        BUILDX_MULTI_PLATFORM_BUILD=true \
+        IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
+        IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
+        IMAGE_TAG=${{ inputs.version }}
+
+    - name: Login to ACR
+      uses: docker/login-action@v2
+      continue-on-error: true
+      with:
+        registry: ${{ inputs.acr-image-registry }}
+        username: ${{ inputs.acr-image-registry-username }}
+        password: ${{ inputs.acr-image-registry-password }}
+
+    - name: Build and push dev builder image to ACR
+      shell: bash
+      continue-on-error: true
+      run: # buildx will cache the images that already built, so it will not take long time to build the images again.
+        make dev-builder \
+        BUILDX_MULTI_PLATFORM_BUILD=true \
+        IMAGE_REGISTRY=${{ inputs.acr-image-registry }} \
+        IMAGE_NAMESPACE=${{ inputs.acr-image-namespace }} \
+        IMAGE_TAG=${{ inputs.version }}
--- a/.github/actions/build-greptime-binary/action.yml
+++ b/.github/actions/build-greptime-binary/action.yml
@@ -0,0 +1,51 @@
+name: Build greptime binary
+description: Build and upload the single linux artifact
+inputs:
+  base-image:
+    description: Base image to build greptime
+    required: true
+  features:
+    description: Cargo features to build
+    required: true
+  cargo-profile:
+    description: Cargo profile to build
+    required: true
+  artifacts-dir:
+    description: Directory to store artifacts
+    required: true
+  version:
+    description: Version of the artifact
+    required: true
+  release-to-s3-bucket:
+    description: S3 bucket to store released artifacts
+    required: true
+  aws-access-key-id:
+    description: AWS access key id
+    required: true
+  aws-secret-access-key:
+    description: AWS secret access key
+    required: true
+  aws-region:
+    description: AWS region
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Build greptime binary
+      shell: bash
+      run: |
+        make build-greptime-by-buildx \
+          CARGO_PROFILE=${{ inputs.cargo-profile }} \
+          FEATURES=${{ inputs.features }} \
+          BASE_IMAGE=${{ inputs.base-image }}
+
+    - name: Upload artifacts
+      uses: ./.github/actions/upload-artifacts
+      with:
+        artifacts-dir: ${{ inputs.artifacts-dir }}
+        target-file: ./greptime
+        version: ${{ inputs.version }}
+        release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
--- a/.github/actions/build-greptime-images/action.yml
+++ b/.github/actions/build-greptime-images/action.yml
@@ -0,0 +1,104 @@
+name: Build greptime images
+description: Build and push greptime images
+inputs:
+  image-registry:
+    description: The image registry to store the images
+    required: true
+  image-registry-username:
+    description: The username to login to the image registry
+    required: true
+  image-registry-password:
+    description: The password to login to the image registry
+    required: true
+  amd64-artifact-name:
+    description: The name of the amd64 artifact for building images
+    required: true
+  arm64-artifact-name:
+    description: The name of the arm64 artifact for building images
+    required: false
+    default: ""
+  image-namespace:
+    description: The namespace of the image registry to store the images
+    required: true
+  image-name:
+    description: The name of the image to build
+    required: true
+  image-tag:
+    description: The tag of the image to build
+    required: true
+  docker-file:
+    description: The path to the Dockerfile to build
+    required: true
+  platforms:
+    description: The supported platforms to build the image
+    required: true
+  push-latest-tag:
+    description: Whether to push the latest tag
+    required: false
+    default: 'true'
+runs:
+  using: composite
+  steps:
+    - name: Login to image registry
+      uses: docker/login-action@v2
+      with:
+        registry: ${{ inputs.image-registry }}
+        username: ${{ inputs.image-registry-username }}
+        password: ${{ inputs.image-registry-password }}
+
+    - name: Set up qemu for multi-platform builds
+      uses: docker/setup-qemu-action@v2
+
+    - name: Set up buildx
+      uses: docker/setup-buildx-action@v2
+
+    - name: Download amd64 artifacts
+      uses: actions/download-artifact@v3
+      with:
+        name: ${{ inputs.amd64-artifact-name }}
+
+    - name: Unzip the amd64 artifacts
+      shell: bash
+      run: |
+        tar xvf ${{ inputs.amd64-artifact-name }}.tar.gz && \
+        rm ${{ inputs.amd64-artifact-name }}.tar.gz && \
+        rm -rf amd64 && \
+        mv ${{ inputs.amd64-artifact-name }} amd64
+
+    - name: Download arm64 artifacts
+      uses: actions/download-artifact@v3
+      if: ${{ inputs.arm64-artifact-name }}
+      with:
+        name: ${{ inputs.arm64-artifact-name }}
+
+    - name: Unzip the arm64 artifacts
+      shell: bash
+      if: ${{ inputs.arm64-artifact-name }}
+      run: |
+        tar xvf ${{ inputs.arm64-artifact-name }}.tar.gz && \
+        rm ${{ inputs.arm64-artifact-name }}.tar.gz && \
+        rm -rf arm64 && \
+        mv ${{ inputs.arm64-artifact-name }} arm64
+
+    - name: Build and push images(without latest) for amd64 and arm64
+      if: ${{ inputs.push-latest-tag == 'false' }}
+      uses: docker/build-push-action@v3
+      with:
+        context: .
+        file: ${{ inputs.docker-file }}
+        push: true
+        platforms: ${{ inputs.platforms }}
+        tags: |
+          ${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:${{ inputs.image-tag }}
+
+    - name: Build and push images for amd64 and arm64
+      if: ${{ inputs.push-latest-tag == 'true' }}
+      uses: docker/build-push-action@v3
+      with:
+        context: .
+        file: ${{ inputs.docker-file }}
+        push: true
+        platforms: ${{ inputs.platforms }}
+        tags: |
+          ${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:latest
+          ${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:${{ inputs.image-tag }}
--- a/.github/actions/build-images/action.yml
+++ b/.github/actions/build-images/action.yml
@@ -0,0 +1,53 @@
+name: Group for building greptimedb images
+description: Group for building greptimedb images
+inputs:
+  image-registry:
+    description: The image registry to store the images
+    required: true
+  image-namespace:
+    description: The namespace of the image registry to store the images
+    required: true
+  image-registry-username:
+    description: The username to login to the image registry
+    required: true
+  image-registry-password:
+    description: The password to login to the image registry
+    required: true
+  version:
+    description: Version of the artifact
+    required: true
+  push-latest-tag:
+    description: Whether to push the latest tag
+    required: false
+    default: 'true'
+runs:
+  using: composite
+  steps:
+    - name: Build and push standard images to dockerhub
+      uses: ./.github/actions/build-greptime-images
+      with:
+        image-registry: ${{ inputs.image-registry }}
+        image-namespace: ${{ inputs.image-namespace }}
+        image-registry-username: ${{ inputs.image-registry-username }}
+        image-registry-password: ${{ inputs.image-registry-password }}
+        image-name: greptimedb
+        image-tag: ${{ inputs.version }}
+        docker-file: docker/ci/Dockerfile
+        amd64-artifact-name: greptime-linux-amd64-pyo3-${{ inputs.version }}
+        arm64-artifact-name: greptime-linux-arm64-pyo3-${{ inputs.version }}
+        platforms: linux/amd64,linux/arm64
+        push-latest-tag: ${{ inputs.push-latest-tag }}
+
+    - name: Build and push centos images to dockerhub
+      uses: ./.github/actions/build-greptime-images
+      with:
+        image-registry: ${{ inputs.image-registry }}
+        image-namespace: ${{ inputs.image-namespace }}
+        image-registry-username: ${{ inputs.image-registry-username }}
+        image-registry-password: ${{ inputs.image-registry-password }}
+        image-name: greptimedb-centos
+        image-tag: ${{ inputs.version }}
+        docker-file: docker/ci/Dockerfile-centos
+        amd64-artifact-name: greptime-linux-amd64-centos-${{ inputs.version }}
+        platforms: linux/amd64
+        push-latest-tag: ${{ inputs.push-latest-tag }}
--- a/.github/actions/build-linux-artifacts/action.yml
+++ b/.github/actions/build-linux-artifacts/action.yml
@@ -0,0 +1,84 @@
+name: Build linux artifacts
+description: Build linux artifacts
+inputs:
+  arch:
+    description: Architecture to build
+    required: true
+  cargo-profile:
+    description: Cargo profile to build
+    required: true
+  version:
+    description: Version of the artifact
+    required: true
+  disable-run-tests:
+    description: Disable running integration tests
+    required: true
+  release-to-s3-bucket:
+    description: S3 bucket to store released artifacts
+    required: true
+  aws-access-key-id:
+    description: AWS access key id
+    required: true
+  aws-secret-access-key:
+    description: AWS secret access key
+    required: true
+  aws-region:
+    description: AWS region
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Run integration test
+      if: ${{ inputs.disable-run-tests == 'false' }}
+      shell: bash
+      # NOTE: If the BUILD_JOBS > 4, it's always OOM in EC2 instance.
+      run: |
+        make run-it-in-container BUILD_JOBS=4
+
+    - name: Upload sqlness logs
+      if: ${{ failure() && inputs.disable-run-tests == 'false' }} # Only upload logs when the integration tests failed.
+      uses: actions/upload-artifact@v3
+      with:
+        name: sqlness-logs
+        path: /tmp/greptime-*.log
+        retention-days: 3
+
+    - name: Build standard greptime
+      uses: ./.github/actions/build-greptime-binary
+      with:
+        base-image: ubuntu
+        features: pyo3_backend,servers/dashboard
+        cargo-profile: ${{ inputs.cargo-profile }}
+        artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }}
+        version: ${{ inputs.version }}
+        release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
+
+    - name: Build greptime without pyo3
+      uses: ./.github/actions/build-greptime-binary
+      with:
+        base-image: ubuntu
+        features: servers/dashboard
+        cargo-profile: ${{ inputs.cargo-profile }}
+        artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
+        version: ${{ inputs.version }}
+        release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
+
+    - name: Build greptime on centos base image
+      uses: ./.github/actions/build-greptime-binary
+      if: ${{ inputs.arch == 'amd64' }} # Only build centos7 base image for amd64.
+      with:
+        base-image: centos
+        features: servers/dashboard
+        cargo-profile: ${{ inputs.cargo-profile }}
+        artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
+        version: ${{ inputs.version }}
+        release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
--- a/.github/actions/build-macos-artifacts/action.yml
+++ b/.github/actions/build-macos-artifacts/action.yml
@@ -0,0 +1,105 @@
+name: Build macos artifacts
+description: Build macos artifacts
+inputs:
+  arch:
+    description: Architecture to build
+    required: true
+  rust-toolchain:
+    description: Rust toolchain to use
+    required: true
+  cargo-profile:
+    description: Cargo profile to build
+    required: true
+  features:
+    description: Cargo features to build
+    required: true
+  version:
+    description: Version of the artifact
+    required: true
+  disable-run-tests:
+    description: Disable running integration tests
+    required: true
+  release-to-s3-bucket:
+    description: S3 bucket to store released artifacts
+    required: true
+  artifacts-dir:
+    description: Directory to store artifacts
+    required: true
+  aws-access-key-id:
+    description: AWS access key id
+    required: true
+  aws-secret-access-key:
+    description: AWS secret access key
+    required: true
+  aws-region:
+    description: AWS region
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Cache cargo assets
+      id: cache
+      uses: actions/cache@v3
+      with:
+        path: |
+          ~/.cargo/bin/
+          ~/.cargo/registry/index/
+          ~/.cargo/registry/cache/
+          ~/.cargo/git/db/
+          target/
+        key: ${{ inputs.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
+
+    - name: Install protoc
+      shell: bash
+      run: |
+        brew install protobuf
+
+    - name: Install rust toolchain
+      uses: dtolnay/rust-toolchain@master
+      with:
+        toolchain: ${{ inputs.rust-toolchain }}
+        targets: ${{ inputs.arch }}
+
+    - name: Start etcd # For integration tests.
+      if: ${{ inputs.disable-run-tests == 'false' }}
+      shell: bash
+      run: |
+        brew install etcd && \
+        brew services start etcd
+
+    - name: Install latest nextest release # For integration tests.
+      if: ${{ inputs.disable-run-tests == 'false' }}
+      uses: taiki-e/install-action@nextest
+
+    - name: Run integration tests
+      if: ${{ inputs.disable-run-tests == 'false' }}
+      shell: bash
+      run: |
+        make test sqlness-test
+
+    - name: Upload sqlness logs
+      if: ${{ failure() }} # Only upload logs when the integration tests failed.
+      uses: actions/upload-artifact@v3
+      with:
+        name: sqlness-logs
+        path: /tmp/greptime-*.log
+        retention-days: 3
+
+    - name: Build greptime binary
+      shell: bash
+      run: |
+        make build \
+        CARGO_PROFILE=${{ inputs.cargo-profile }} \
+        FEATURES=${{ inputs.features }} \
+        TARGET=${{ inputs.arch }}
+
+    - name: Upload artifacts
+      uses: ./.github/actions/upload-artifacts
+      with:
+        artifacts-dir: ${{ inputs.artifacts-dir }}
+        target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
+        version: ${{ inputs.version }}
+        release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
--- a/.github/actions/release-artifacts/action.yml
+++ b/.github/actions/release-artifacts/action.yml
@@ -0,0 +1,50 @@
+name: Release artifacts
+description: Release artifacts
+inputs:
+  version:
+    description: Version to release
+    required: true
+runs:
+  using: composite
+  steps:
+    # Download artifacts from previous jobs, the artifacts will be downloaded to:
+    # ${WORKING_DIR}
+    #   |- greptime-darwin-amd64-pyo3-v0.5.0/greptime-darwin-amd64-pyo3-v0.5.0.tar.gz
+    #   |- greptime-darwin-amd64-pyo3-v0.5.0.sha256sum/greptime-darwin-amd64-pyo3-v0.5.0.sha256sum
+    #   |- greptime-darwin-amd64-v0.5.0/greptime-darwin-amd64-v0.5.0.tar.gz
+    #   |- greptime-darwin-amd64-v0.5.0.sha256sum/greptime-darwin-amd64-v0.5.0.sha256sum
+    #   ...
+    - name: Download artifacts
+      uses: actions/download-artifact@v3
+
+    - name: Create git tag for release
+      if: ${{ github.event_name != 'push' }} # Meaning this is a scheduled or manual workflow.
+      shell: bash
+      run: |
+        git tag ${{ inputs.version }}
+
+    # Only publish release when the release tag is like v1.0.0, v1.0.1, v1.0.2, etc.
+    - name: Set release arguments
+      shell: bash
+      run: |
+        if [[ "${{ inputs.version }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+          echo "prerelease=false" >> $GITHUB_ENV
+          echo "makeLatest=true" >> $GITHUB_ENV
+          echo "generateReleaseNotes=false" >> $GITHUB_ENV
+        else
+          echo "prerelease=true" >> $GITHUB_ENV
+          echo "makeLatest=false" >> $GITHUB_ENV
+          echo "generateReleaseNotes=true" >> $GITHUB_ENV
+        fi
+
+    - name: Publish release
+      uses: ncipollo/release-action@v1
+      with:
+        name: "Release ${{ inputs.version }}"
+        prerelease: ${{ env.prerelease }}
+        makeLatest: ${{ env.makeLatest }}
+        tag: ${{ inputs.version }}
+        generateReleaseNotes: ${{ env.generateReleaseNotes }}
+        allowUpdates: true
+        artifacts: |
+          **/greptime-*/*
--- a/.github/actions/start-runner/action.yml
+++ b/.github/actions/start-runner/action.yml
@@ -0,0 +1,67 @@
+name: Start EC2 runner
+description: Start EC2 runner
+inputs:
+  runner:
+    description: The linux runner name
+    required: true
+  aws-access-key-id:
+    description: AWS access key id
+    required: true
+  aws-secret-access-key:
+    description: AWS secret access key
+    required: true
+  aws-region:
+    description: AWS region
+    required: true
+  github-token:
+    description: The GitHub token to clone private repository
+    required: false
+    default: ""
+  image-id:
+    description: The EC2 image id
+    required: true
+  security-group-id:
+    description: The EC2 security group id
+    required: true
+  subnet-id:
+    description: The EC2 subnet id
+    required: true
+outputs:
+  label:
+    description: "label"
+    value: ${{ steps.start-linux-arm64-ec2-runner.outputs.label || inputs.runner }}
+  ec2-instance-id:
+    description: "ec2-instance-id"
+    value: ${{ steps.start-linux-arm64-ec2-runner.outputs.ec2-instance-id }}
+runs:
+  using: composite
+  steps:
+    - name: Configure AWS credentials
+      if: startsWith(inputs.runner, 'ec2')
+      uses: aws-actions/configure-aws-credentials@v2
+      with:
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
+
+    # The EC2 runner will use the following format:
+    # <vm-type>-<instance-type>-<arch>
+    # like 'ec2-c6a.4xlarge-amd64'.
+    - name: Get EC2 instance type
+      if: startsWith(inputs.runner, 'ec2')
+      id: get-ec2-instance-type
+      shell: bash
+      run: |
+        echo "instance-type=$(echo ${{ inputs.runner }} | cut -d'-' -f2)" >> $GITHUB_OUTPUT
+
+    - name: Start EC2 runner
+      if: startsWith(inputs.runner, 'ec2')
+      uses: machulav/ec2-github-runner@v2
+      id: start-linux-arm64-ec2-runner
+      with:
+        mode: start
+        ec2-image-id: ${{ inputs.image-id }}
+        ec2-instance-type: ${{ steps.get-ec2-instance-type.outputs.instance-type }}
+        subnet-id: ${{ inputs.subnet-id }}
+        security-group-id: ${{ inputs.security-group-id }}
+        github-token: ${{ inputs.github-token }}
--- a/.github/actions/stop-runner/action.yml
+++ b/.github/actions/stop-runner/action.yml
@@ -0,0 +1,41 @@
+name: Stop EC2 runner
+description: Stop EC2 runner
+inputs:
+  label:
+    description: The linux runner name
+    required: true
+  ec2-instance-id:
+    description: The EC2 instance id
+    required: true
+  aws-access-key-id:
+    description: AWS access key id
+    required: true
+  aws-secret-access-key:
+    description: AWS secret access key
+    required: true
+  aws-region:
+    description: AWS region
+    required: true
+  github-token:
+    description: The GitHub token to clone private repository
+    required: false
+    default: ""
+runs:
+  using: composite
+  steps:
+    - name: Configure AWS credentials
+      if: ${{ inputs.label && inputs.ec2-instance-id }}
+      uses: aws-actions/configure-aws-credentials@v2
+      with:
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
+
+    - name: Stop EC2 runner
+      if: ${{ inputs.label && inputs.ec2-instance-id }}
+      uses: machulav/ec2-github-runner@v2
+      with:
+        mode: stop
+        label: ${{ inputs.label }}
+        ec2-instance-id: ${{ inputs.ec2-instance-id }}
+        github-token: ${{ inputs.github-token }}
--- a/.github/actions/upload-artifacts/action.yml
+++ b/.github/actions/upload-artifacts/action.yml
@@ -0,0 +1,82 @@
+name: Upload artifacts
+description: Upload artifacts
+inputs:
+  artifacts-dir:
+    description: Directory to store artifacts
+    required: true
+  target-file:
+    description: The path of the target artifact
+    required: true
+  version:
+    description: Version of the artifact
+    required: true
+  release-to-s3-bucket:
+    description: S3 bucket to store released artifacts
+    required: true
+  aws-access-key-id:
+    description: AWS access key id
+    required: true
+  aws-secret-access-key:
+    description: AWS secret access key
+    required: true
+  aws-region:
+    description: AWS region
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Create artifacts directory
+      shell: bash
+      run: |
+        mkdir -p ${{ inputs.artifacts-dir }} && \
+        mv ${{ inputs.target-file }} ${{ inputs.artifacts-dir }}
+
+    # The compressed artifacts will use the following layout:
+    # greptime-linux-amd64-pyo3-v0.3.0sha256sum
+    # greptime-linux-amd64-pyo3-v0.3.0.tar.gz
+    #   greptime-linux-amd64-pyo3-v0.3.0
+    #   └── greptime
+    - name: Compress artifacts and calculate checksum
+      shell: bash
+      run: |
+        tar -zcvf ${{ inputs.artifacts-dir }}.tar.gz ${{ inputs.artifacts-dir }} && \
+        echo $(shasum -a 256 ${{ inputs.artifacts-dir }}.tar.gz | cut -f1 -d' ') > ${{ inputs.artifacts-dir }}.sha256sum
+
+    # Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39).
+    # However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped.
+    - name: Upload artifacts
+      uses: actions/upload-artifact@v3
+      with:
+        name: ${{ inputs.artifacts-dir }}
+        path: ${{ inputs.artifacts-dir }}.tar.gz
+
+    - name: Upload checksum
+      uses: actions/upload-artifact@v3
+      with:
+        name: ${{ inputs.artifacts-dir }}.sha256sum
+        path: ${{ inputs.artifacts-dir }}.sha256sum
+
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v2
+      with:
+        aws-access-key-id: ${{ inputs.aws-access-key-id }}
+        aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
+        aws-region: ${{ inputs.aws-region }}
+
+    - name: Upload artifacts to S3
+      shell: bash
+      # The bucket layout will be:
+      # releases/greptimedb
+      # ├── v0.1.0
+      # │   ├── greptime-darwin-amd64-pyo3-v0.1.0.sha256sum
+      # │   └── greptime-darwin-amd64-pyo3-v0.1.0.tar.gz
+      # └── v0.2.0
+      #    ├── greptime-darwin-amd64-pyo3-v0.2.0.sha256sum
+      #    └── greptime-darwin-amd64-pyo3-v0.2.0.tar.gz
+      run: |
+        aws s3 cp \
+          ${{ inputs.artifacts-dir }}.tar.gz \
+          s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.tar.gz && \
+        aws s3 cp \
+          ${{ inputs.artifacts-dir }}.sha256sum \
+          s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.sha256sum
--- a/.github/scripts/create-version.sh
+++ b/.github/scripts/create-version.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+set -e
+
+# - If it's a tag push release, the version is the tag name(${{ github.ref_name }});
+# - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like 'v0.2.0-nightly-20230313';
+# - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-$(git rev-parse --short HEAD)-YYYYMMDDSS', like 'v0.2.0-e5b243c-2023071245';
+# - If it's a nightly build, the version is 'nightly-YYYYMMDD-$(git rev-parse --short HEAD)', like 'nightly-20230712-e5b243c'.
+# create_version ${GIHUB_EVENT_NAME} ${NEXT_RELEASE_VERSION} ${NIGHTLY_RELEASE_PREFIX}
+function create_version() {
+  # Read from envrionment variables.
+  if [ -z "$GITHUB_EVENT_NAME" ]; then
+      echo "GITHUB_EVENT_NAME is empty"
+      exit 1
+  fi
+
+  if [ -z "$NEXT_RELEASE_VERSION" ]; then
+      echo "NEXT_RELEASE_VERSION is empty"
+      exit 1
+  fi
+
+  if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then
+      echo "NIGHTLY_RELEASE_PREFIX is empty"
+      exit 1
+  fi
+
+  # Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
+  if [ "$NEXT_RELEASE_VERSION" = dev ]; then
+    echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
+    exit 0
+  fi
+
+  # Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs.
+  if [ "$GITHUB_EVENT_NAME" = push ]; then
+    if [ -z "$GITHUB_REF_NAME" ]; then
+      echo "GITHUB_REF_NAME is empty in push event"
+      exit 1
+    fi
+    echo "$GITHUB_REF_NAME"
+  elif [ "$GITHUB_EVENT_NAME" = workflow_dispatch ]; then
+    echo "$NEXT_RELEASE_VERSION-$(git rev-parse --short HEAD)-$(date "+%Y%m%d%S")"
+  elif [ "$GITHUB_EVENT_NAME" = schedule ]; then
+    echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")"
+  else
+    echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME"
+    exit 1
+  fi
+}
+
+# You can run as following examples:
+#  GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
+#  GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
+#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
+#  GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
+create_version
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -13,7 +13,7 @@ on:
 name: Build API docs

 env:
-  RUST_TOOLCHAIN: nightly-2023-02-26
+  RUST_TOOLCHAIN: nightly-2023-05-03

 jobs:
  apidoc:
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -1,4 +1,5 @@
 on:
+  merge_group:
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
    paths-ignore:
@@ -24,7 +25,7 @@ on:
 name: CI

 env:
-  RUST_TOOLCHAIN: nightly-2023-02-26
+  RUST_TOOLCHAIN: nightly-2023-05-03

 jobs:
  typos:
@@ -50,7 +51,7 @@ jobs:
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
      - name: Run cargo check
-        run: cargo check --workspace --all-targets
+        run: cargo check --locked --workspace --all-targets

  toml:
    name: Toml Check
@@ -67,7 +68,7 @@ jobs:
      - name: Install taplo
        run: cargo install taplo-cli --version ^0.8 --locked
      - name: Run taplo
-        run: taplo format --check --option "indent_string=    "
+        run: taplo format --check

  # Use coverage to run test.
  # test:
@@ -116,7 +117,6 @@ jobs:
    if: github.event.pull_request.draft == false
    runs-on: ubuntu-latest-8-cores
    timeout-minutes: 60
-    needs: [clippy]
    steps:
      - uses: actions/checkout@v3
      - uses: arduino/setup-protoc@v1
@@ -141,6 +141,7 @@ jobs:
      - name: Run sqlness
        run: cargo sqlness && ls /tmp
      - name: Upload sqlness logs
+        if: always()
        uses: actions/upload-artifact@v3
        with:
          name: sqlness-logs
@@ -183,13 +184,12 @@ jobs:
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
      - name: Run cargo clippy
-        run: cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
+        run: cargo clippy --workspace --all-targets -- -D warnings

  coverage:
    if: github.event.pull_request.draft == false
    runs-on: ubuntu-latest-8-cores
    timeout-minutes: 60
-    needs: [clippy]
    steps:
      - uses: actions/checkout@v3
      - uses: arduino/setup-protoc@v1
@@ -216,7 +216,7 @@ jobs:
      - name: Install cargo-llvm-cov
        uses: taiki-e/install-action@cargo-llvm-cov
      - name: Collect coverage data
-        run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend
+        run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard
        env:
          CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
          RUST_BACKTRACE: 1
@@ -224,6 +224,7 @@ jobs:
          GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
          GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
          GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
+          GT_S3_REGION: ${{ secrets.S3_REGION }}
          UNITTEST_LOG_DIR: "__unittest_logs"
      - name: Codecov upload
        uses: codecov/codecov-action@v2
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -1,4 +1,5 @@
 on:
+  merge_group:
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
    paths:
@@ -27,6 +28,13 @@ name: CI
 # https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks

 jobs:
+  typos:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: crate-ci/typos@v1.13.10
+
  check:
    name: Check
    if: github.event.pull_request.draft == false
@@ -53,3 +61,10 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - run: 'echo "No action required"'
+
+  sqlness:
+    name: Sqlness Test
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    steps:
+      - run: 'echo "No action required"'
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -0,0 +1,309 @@
+# Nightly build only do the following things:
+# 1. Run integration tests;
+# 2. Build binaries and images for linux-amd64 and linux-arm64 platform;
+name: GreptimeDB Nightly Build
+
+on:
+  schedule:
+    # Trigger at 00:00(UTC) on every day-of-week from Monday through Friday.
+    - cron: '0 0 * * 1-5'
+  workflow_dispatch: # Allows you to run this workflow manually.
+    inputs:
+      linux_amd64_runner:
+        type: choice
+        description: The runner uses to build linux-amd64 artifacts
+        default: ec2-c6i.2xlarge-amd64
+        options:
+          - ubuntu-latest
+          - ubuntu-latest-8-cores
+          - ubuntu-latest-16-cores
+          - ubuntu-latest-32-cores
+          - ubuntu-latest-64-cores
+          - ec2-c6i.xlarge-amd64 # 4C8G
+          - ec2-c6i.2xlarge-amd64 # 8C16G
+          - ec2-c6i.4xlarge-amd64 # 16C32G
+          - ec2-c6i.8xlarge-amd64 # 32C64G
+          - ec2-c6i.16xlarge-amd64 # 64C128G
+      linux_arm64_runner:
+        type: choice
+        description: The runner uses to build linux-arm64 artifacts
+        default: ec2-c6g.2xlarge-arm64
+        options:
+          - ec2-c6g.xlarge-arm64 # 4C8G
+          - ec2-c6g.2xlarge-arm64 # 8C16G
+          - ec2-c6g.4xlarge-arm64 # 16C32G
+          - ec2-c6g.8xlarge-arm64 # 32C64G
+          - ec2-c6g.16xlarge-arm64 # 64C128G
+      skip_test:
+        description: Do not run integration tests during the build
+        type: boolean
+        default: true
+      build_linux_amd64_artifacts:
+        type: boolean
+        description: Build linux-amd64 artifacts
+        required: false
+        default: false
+      build_linux_arm64_artifacts:
+        type: boolean
+        description: Build linux-arm64 artifacts
+        required: false
+        default: false
+      release_images:
+        type: boolean
+        description: Build and push images to DockerHub and ACR
+        required: false
+        default: false
+
+# Use env variables to control all the release process.
+env:
+  CARGO_PROFILE: nightly
+
+  # Controls whether to run tests, include unit-test, integration-test and sqlness.
+  DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
+
+  # Always use 'dev' to indicate it's the nightly build.
+  NEXT_RELEASE_VERSION: dev
+
+  NIGHTLY_RELEASE_PREFIX: nightly
+
+jobs:
+  allocate-runners:
+    name: Allocate runners
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
+    runs-on: ubuntu-latest
+    outputs:
+      linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
+      linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
+
+      # The following EC2 resource id will be used for resource releasing.
+      linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }}
+      linux-amd64-ec2-runner-instance-id: ${{ steps.start-linux-amd64-runner.outputs.ec2-instance-id }}
+      linux-arm64-ec2-runner-label: ${{ steps.start-linux-arm64-runner.outputs.label }}
+      linux-arm64-ec2-runner-instance-id: ${{ steps.start-linux-arm64-runner.outputs.ec2-instance-id }}
+
+      # The 'version' use as the global tag name of the release workflow.
+      version: ${{ steps.create-version.outputs.version }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Create version
+        id: create-version
+        run: |
+          version=$(./.github/scripts/create-version.sh) && \
+          echo $version && \
+          echo "version=$version" >> $GITHUB_OUTPUT
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_REF_NAME: ${{ github.ref_name }}
+          NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
+          NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
+
+      - name: Allocate linux-amd64 runner
+        if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'schedule' }}
+        uses: ./.github/actions/start-runner
+        id: start-linux-amd64-runner
+        with:
+          runner: ${{ inputs.linux_amd64_runner || vars.DEFAULT_AMD64_RUNNER }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          image-id: ${{ vars.EC2_RUNNER_LINUX_AMD64_IMAGE_ID }}
+          security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
+          subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
+
+      - name: Allocate linux-arm64 runner
+        if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'schedule' }}
+        uses: ./.github/actions/start-runner
+        id: start-linux-arm64-runner
+        with:
+          runner: ${{ inputs.linux_arm64_runner || vars.DEFAULT_ARM64_RUNNER }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          image-id: ${{ vars.EC2_RUNNER_LINUX_ARM64_IMAGE_ID }}
+          security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
+          subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
+
+  build-linux-amd64-artifacts:
+    name: Build linux-amd64 artifacts
+    if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+    ]
+    runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: ./.github/actions/build-linux-artifacts
+        with:
+          arch: amd64
+          cargo-profile: ${{ env.CARGO_PROFILE }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
+          aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+
+  build-linux-arm64-artifacts:
+    name: Build linux-arm64 artifacts
+    if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+    ]
+    runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: ./.github/actions/build-linux-artifacts
+        with:
+          arch: arm64
+          cargo-profile: ${{ env.CARGO_PROFILE }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
+          aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+
+  release-images-to-dockerhub:
+    name: Build and push images to DockerHub
+    if: ${{ inputs.release_images || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+      build-linux-arm64-artifacts,
+    ]
+    runs-on: ubuntu-latest
+    outputs:
+      nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Build and push images to dockerhub
+        uses: ./.github/actions/build-images
+        with:
+          image-registry: docker.io
+          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
+          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
+          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          push-latest-tag: false # Don't push the latest tag to registry.
+
+      - name: Set nightly build result
+        id: set-nightly-build-result
+        run: |
+          echo "nightly-build-result=success" >> $GITHUB_OUTPUT
+
+  release-images-to-acr:
+    name: Build and push images to ACR
+    if: ${{ inputs.release_images || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+      build-linux-arm64-artifacts,
+    ]
+    runs-on: ubuntu-latest
+    # When we push to ACR, it's easy to fail due to some unknown network issues.
+    # However, we don't want to fail the whole workflow because of this.
+    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Build and push images to ACR
+        uses: ./.github/actions/build-images
+        with:
+          image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
+          image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
+          image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          push-latest-tag: false # Don't push the latest tag to registry.
+
+  stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
+    name: Stop linux-amd64 runner
+    # Only run this job when the runner is allocated.
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+    ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Stop EC2 runner
+        uses: ./.github/actions/stop-runner
+        with:
+          label: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-label }}
+          ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-instance-id }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+
+  stop-linux-arm64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
+    name: Stop linux-arm64 runner
+    # Only run this job when the runner is allocated.
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    needs: [
+      allocate-runners,
+      build-linux-arm64-artifacts,
+    ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Stop EC2 runner
+        uses: ./.github/actions/stop-runner
+        with:
+          label: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-label }}
+          ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-instance-id }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+
+  notification:
+    if: ${{ always() }} # Not requiring successful dependent jobs, always run.
+    name: Send notification to Greptime team
+    needs: [
+      release-images-to-dockerhub
+    ]
+    runs-on: ubuntu-latest
+    env:
+      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
+    steps:
+      - name: Notifiy nightly build successful result
+        uses: slackapi/slack-github-action@v1.23.0
+        if: ${{ needs.release-images-to-dockerhub.outputs.nightly-build-result == 'success' }}
+        with:
+          payload: |
+            {"text": "GreptimeDB nightly build successful"}
+
+      - name: Notifiy nightly build failed result
+        uses: slackapi/slack-github-action@v1.23.0
+        if: ${{ needs.release-images-to-dockerhub.outputs.nightly-build-result != 'success' }}
+        with:
+          payload: |
+            {"text": "GreptimeDB nightly build failed, please check 'https://github.com/GreptimeTeam/greptimedb/actions/workflows/nightly-build.yml'"}
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,3 +1,8 @@
+name: Release
+
+# There are two kinds of formal release:
+# 1. The tag('v*.*.*') push release: the release workflow will be triggered by the tag push event.
+# 2. The scheduled release(the version will be '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD'): the release workflow will be triggered by the schedule event.
 on:
  push:
    tags:
@@ -5,278 +10,386 @@ on:
  schedule:
    # At 00:00 on Monday.
    - cron: '0 0 * * 1'
-  workflow_dispatch:
-
-name: Release
+  workflow_dispatch: # Allows you to run this workflow manually.
+    # Notes: The GitHub Actions ONLY support 10 inputs, and it's already used up.
+    inputs:
+      linux_amd64_runner:
+        type: choice
+        description: The runner uses to build linux-amd64 artifacts
+        default: ec2-c6i.4xlarge-amd64
+        options:
+          - ubuntu-latest
+          - ubuntu-latest-8-cores
+          - ubuntu-latest-16-cores
+          - ubuntu-latest-32-cores
+          - ubuntu-latest-64-cores
+          - ec2-c6i.xlarge-amd64 # 4C8G
+          - ec2-c6i.2xlarge-amd64 # 8C16G
+          - ec2-c6i.4xlarge-amd64 # 16C32G
+          - ec2-c6i.8xlarge-amd64 # 32C64G
+          - ec2-c6i.16xlarge-amd64 # 64C128G
+      linux_arm64_runner:
+        type: choice
+        description: The runner uses to build linux-arm64 artifacts
+        default: ec2-c6g.4xlarge-arm64
+        options:
+          - ec2-c6g.xlarge-arm64 # 4C8G
+          - ec2-c6g.2xlarge-arm64 # 8C16G
+          - ec2-c6g.4xlarge-arm64 # 16C32G
+          - ec2-c6g.8xlarge-arm64 # 32C64G
+          - ec2-c6g.16xlarge-arm64 # 64C128G
+      macos_runner:
+        type: choice
+        description: The runner uses to build macOS artifacts
+        default: macos-latest
+        options:
+          - macos-latest
+      skip_test:
+        description: Do not run integration tests during the build
+        type: boolean
+        default: true
+      build_linux_amd64_artifacts:
+        type: boolean
+        description: Build linux-amd64 artifacts
+        required: false
+        default: false
+      build_linux_arm64_artifacts:
+        type: boolean
+        description: Build linux-arm64 artifacts
+        required: false
+        default: false
+      build_macos_artifacts:
+        type: boolean
+        description: Build macos artifacts
+        required: false
+        default: false
+      release_artifacts:
+        type: boolean
+        description: Create GitHub release and upload artifacts
+        required: false
+        default: false
+      release_images:
+        type: boolean
+        description: Build and push images to DockerHub and ACR
+        required: false
+        default: false
+      release_dev_builder_image:
+        type: boolean
+        description: Release dev-builder image
+        required: false
+        default: false

+# Use env variables to control all the release process.
 env:
-  RUST_TOOLCHAIN: nightly-2023-02-26
-
-  SCHEDULED_BUILD_VERSION_PREFIX: v0.2.0
-
-  SCHEDULED_PERIOD: nightly
-
+  # The arguments of building greptime.
+  RUST_TOOLCHAIN: nightly-2023-05-03
  CARGO_PROFILE: nightly

+  # Controls whether to run tests, include unit-test, integration-test and sqlness.
+  DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
+
+  # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
+  NIGHTLY_RELEASE_PREFIX: nightly
+  # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
+  NEXT_RELEASE_VERSION: v0.4.0
+
 jobs:
-  build:
-    name: Build binary
+  allocate-runners:
+    name: Allocate runners
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
+    runs-on: ubuntu-latest
+    outputs:
+      linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
+      linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
+      macos-runner: ${{ inputs.macos_runner || vars.DEFAULT_MACOS_RUNNER }}
+
+      # The following EC2 resource id will be used for resource releasing.
+      linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }}
+      linux-amd64-ec2-runner-instance-id: ${{ steps.start-linux-amd64-runner.outputs.ec2-instance-id }}
+      linux-arm64-ec2-runner-label: ${{ steps.start-linux-arm64-runner.outputs.label }}
+      linux-arm64-ec2-runner-instance-id: ${{ steps.start-linux-arm64-runner.outputs.ec2-instance-id }}
+
+      # The 'version' use as the global tag name of the release workflow.
+      version: ${{ steps.create-version.outputs.version }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      # The create-version will create a global variable named 'version' in the global workflows.
+      # - If it's a tag push release, the version is the tag name(${{ github.ref_name }});
+      # - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like v0.2.0-nigthly-20230313;
+      # - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-<short-git-sha>-YYYYMMDDSS', like v0.2.0-e5b243c-2023071245;
+      - name: Create version
+        id: create-version
+        run: |
+          echo "version=$(./.github/scripts/create-version.sh)" >> $GITHUB_OUTPUT
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_REF_NAME: ${{ github.ref_name }}
+          NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
+          NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
+
+      - name: Allocate linux-amd64 runner
+        if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+        uses: ./.github/actions/start-runner
+        id: start-linux-amd64-runner
+        with:
+          runner: ${{ inputs.linux_amd64_runner || vars.DEFAULT_AMD64_RUNNER }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          image-id: ${{ vars.EC2_RUNNER_LINUX_AMD64_IMAGE_ID }}
+          security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
+          subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
+
+      - name: Allocate linux-arm64 runner
+        if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+        uses: ./.github/actions/start-runner
+        id: start-linux-arm64-runner
+        with:
+          runner: ${{ inputs.linux_arm64_runner || vars.DEFAULT_ARM64_RUNNER }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          image-id: ${{ vars.EC2_RUNNER_LINUX_ARM64_IMAGE_ID }}
+          security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
+          subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
+
+  build-linux-amd64-artifacts:
+    name: Build linux-amd64 artifacts
+    if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+    ]
+    runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: ./.github/actions/build-linux-artifacts
+        with:
+          arch: amd64
+          cargo-profile: ${{ env.CARGO_PROFILE }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
+          aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+
+  build-linux-arm64-artifacts:
+    name: Build linux-arm64 artifacts
+    if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+    ]
+    runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: ./.github/actions/build-linux-artifacts
+        with:
+          arch: arm64
+          cargo-profile: ${{ env.CARGO_PROFILE }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
+          aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+
+  build-macos-artifacts:
+    name: Build macOS artifacts
    strategy:
+      fail-fast: false
      matrix:
-        # The file format is greptime-<os>-<arch>
        include:
-          - arch: x86_64-unknown-linux-gnu
-            os: ubuntu-2004-16-cores
-            file: greptime-linux-amd64
-            continue-on-error: false
-          - arch: aarch64-unknown-linux-gnu
-            os: ubuntu-2004-16-cores
-            file: greptime-linux-arm64
-            continue-on-error: true
-          - arch: aarch64-apple-darwin
-            os: macos-latest
-            file: greptime-darwin-arm64
-            continue-on-error: true
-          - arch: x86_64-apple-darwin
-            os: macos-latest
-            file: greptime-darwin-amd64
-            continue-on-error: true
+          - os: ${{ needs.allocate-runners.outputs.macos-runner }}
+            arch: aarch64-apple-darwin
+            features: servers/dashboard
+            artifacts-dir-prefix: greptime-darwin-arm64
+          - os: ${{ needs.allocate-runners.outputs.macos-runner }}
+            arch: aarch64-apple-darwin
+            features: pyo3_backend,servers/dashboard
+            artifacts-dir-prefix: greptime-darwin-arm64-pyo3
+          - os: ${{ needs.allocate-runners.outputs.macos-runner }}
+            features: servers/dashboard
+            arch: x86_64-apple-darwin
+            artifacts-dir-prefix: greptime-darwin-amd64
+          - os: ${{ needs.allocate-runners.outputs.macos-runner }}
+            features: pyo3_backend,servers/dashboard
+            arch: x86_64-apple-darwin
+            artifacts-dir-prefix: greptime-darwin-amd64-pyo3
    runs-on: ${{ matrix.os }}
-    continue-on-error: ${{ matrix.continue-on-error }}
-    if: github.repository == 'GreptimeTeam/greptimedb'
+    needs: [
+      allocate-runners,
+    ]
+    if: ${{ inputs.build_macos_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
    steps:
-      - name: Checkout sources
-        uses: actions/checkout@v3
-
-      - name: Cache cargo assets
-        id: cache
-        uses: actions/cache@v3
+      - uses: actions/checkout@v3
        with:
-          path: |
-            ~/.cargo/bin/
-            ~/.cargo/registry/index/
-            ~/.cargo/registry/cache/
-            ~/.cargo/git/db/
-            target/
-          key: ${{ matrix.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
+          fetch-depth: 0

-      - name: Install Protoc for linux
-        if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
-        run: | # Make sure the protoc is >= 3.15
-          wget https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip
-          unzip protoc-21.9-linux-x86_64.zip -d protoc
-          sudo cp protoc/bin/protoc /usr/local/bin/
-          sudo cp -r protoc/include/google /usr/local/include/
-
-      - name: Install Protoc for macos
-        if: contains(matrix.arch, 'darwin')
-        run: |
-          brew install protobuf
-
-      - name: Install etcd for linux
-        if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
-        run: |
-          ETCD_VER=v3.5.7
-          DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
-          curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
-          mkdir -p /tmp/etcd-download
-          tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
-          rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
-
-          sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
-          nohup etcd >/tmp/etcd.log 2>&1 &
-
-      - name: Install etcd for macos
-        if: contains(matrix.arch, 'darwin')
-        run: |
-          brew install etcd
-          brew services start etcd
-
-      - name: Install dependencies for linux
-        if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
-        run: |
-          sudo apt-get -y update
-          sudo apt-get -y install libssl-dev pkg-config g++-aarch64-linux-gnu gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu wget
-
-      - name: Compile Python 3.10.10 from source for Aarch64
-        if: contains(matrix.arch, 'aarch64-unknown-linux-gnu')
-        run: |
-          sudo chmod +x ./docker/aarch64/compile-python.sh
-          sudo ./docker/aarch64/compile-python.sh
-          export PYO3_CROSS_LIB_DIR=${PWD}/python310-aarch64/lib
-          echo $PYO3_CROSS_LIB_DIR
-
-      - name: Install rust toolchain
-        uses: dtolnay/rust-toolchain@master
+      - uses: ./.github/actions/build-macos-artifacts
        with:
-          toolchain: ${{ env.RUST_TOOLCHAIN }}
-          targets: ${{ matrix.arch }}
+          arch: ${{ matrix.arch }}
+          rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
+          cargo-profile: ${{ env.CARGO_PROFILE }}
+          features: ${{ matrix.features }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+          disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
+          release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
+          artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
+          aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}

-      - name: Output package versions
-        run: protoc --version ; cargo version ; rustc --version ; gcc --version ; g++ --version
-
-      - name: Run tests
-        run: make unit-test integration-test sqlness-test
-
-      - name: Run cargo build
-        run: cargo build ${{ matrix.opts }} --profile ${{ env.CARGO_PROFILE }} --locked --target ${{ matrix.arch }}
-
-      - name: Calculate checksum and rename binary
-        shell: bash
-        run: |
-          cd target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}
-          chmod +x greptime
-          tar -zcvf ${{ matrix.file }}.tgz greptime
-          echo $(shasum -a 256 ${{ matrix.file }}.tgz | cut -f1 -d' ') > ${{ matrix.file }}.sha256sum
-
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v3
+  release-images-to-dockerhub:
+    name: Build and push images to DockerHub
+    if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+      build-linux-arm64-artifacts,
+    ]
+    runs-on: ubuntu-2004-16-cores
+    steps:
+      - uses: actions/checkout@v3
        with:
-          name: ${{ matrix.file }}
-          path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.tgz
+          fetch-depth: 0

-      - name: Upload checksum of artifacts
-        uses: actions/upload-artifact@v3
+      - name: Build and push images to dockerhub
+        uses: ./.github/actions/build-images
        with:
-          name: ${{ matrix.file }}.sha256sum
-          path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.sha256sum
-  release:
-    name: Release artifacts
-    needs: [build]
+          image-registry: docker.io
+          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
+          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
+          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+
+  release-images-to-acr:
+    name: Build and push images to ACR
+    if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+      build-linux-arm64-artifacts,
+    ]
+    runs-on: ubuntu-2004-16-cores
+    # When we push to ACR, it's easy to fail due to some unknown network issues.
+    # However, we don't want to fail the whole workflow because of this.
+    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Build and push images to ACR
+        uses: ./.github/actions/build-images
+        with:
+          image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
+          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
+          image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
+          image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
+          version: ${{ needs.allocate-runners.outputs.version }}
+
+  release-artifacts:
+    name: Create GitHub release and upload artifacts
+    if: ${{ inputs.release_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+      build-linux-arm64-artifacts,
+      build-macos-artifacts,
+      release-images-to-dockerhub,
+    ]
    runs-on: ubuntu-latest
-    if: github.repository == 'GreptimeTeam/greptimedb'
    steps:
-      - name: Checkout sources
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Release artifacts
+        uses: ./.github/actions/release-artifacts
+        with:
+          version: ${{ needs.allocate-runners.outputs.version }}
+
+  release-dev-builder-image:
+    name: Release dev builder image
+    if: ${{ inputs.release_dev_builder_image }} # Only manually trigger this job.
+    runs-on: ubuntu-latest-16-cores
+    steps:
+      - name: Checkout
        uses: actions/checkout@v3
-
-      - name: Download artifacts
-        uses: actions/download-artifact@v3
-
-      - name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-${SCHEDULED_PERIOD}-YYYYMMDD, like v0.2.0-nigthly-20230313.
-        shell: bash
-        if: github.event_name == 'schedule'
-        run: |
-          buildTime=`date "+%Y%m%d"`
-          SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-${{ env.SCHEDULED_PERIOD }}-$buildTime
-          echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
-
-      - name: Create scheduled build git tag
-        if: github.event_name == 'schedule'
-        run: |
-          git tag ${{ env.SCHEDULED_BUILD_VERSION }}
-
-      - name: Publish scheduled release # configure the different release title and tags.
-        uses: softprops/action-gh-release@v1
-        if: github.event_name == 'schedule'
        with:
-          name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
-          tag_name: ${{ env.SCHEDULED_BUILD_VERSION }}
-          generate_release_notes: true
-          files: |
-            **/greptime-*
+          fetch-depth: 0

-      - name: Publish release
-        uses: softprops/action-gh-release@v1
-        if: github.event_name != 'schedule'
+      - name: Build and push dev builder image
+        uses: ./.github/actions/build-dev-builder-image
        with:
-          name: "Release ${{ github.ref_name }}"
-          files: |
-            **/greptime-*
+          dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
+          dockerhub-image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
+          acr-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
+          acr-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
+          acr-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}

-  docker:
-    name: Build docker image
-    needs: [build]
+  ### Stop runners ###
+  # It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'.
+  # Because we can terminate the specified EC2 instance immediately after the job is finished without uncessary waiting.
+  stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
+    name: Stop linux-amd64 runner
+    # Only run this job when the runner is allocated.
+    if: ${{ always() }}
    runs-on: ubuntu-latest
-    if: github.repository == 'GreptimeTeam/greptimedb'
+    needs: [
+      allocate-runners,
+      build-linux-amd64-artifacts,
+    ]
    steps:
-      - name: Checkout sources
+      - name: Checkout
        uses: actions/checkout@v3
-
-      - name: Login to UCloud Container Registry
-        uses: docker/login-action@v2
        with:
-          registry: uhub.service.ucloud.cn
-          username: ${{ secrets.UCLOUD_USERNAME }}
-          password: ${{ secrets.UCLOUD_PASSWORD }}
+          fetch-depth: 0

-      - name: Login to Dockerhub
-        uses: docker/login-action@v2
+      - name: Stop EC2 runner
+        uses: ./.github/actions/stop-runner
        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          label: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-label }}
+          ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-instance-id }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}

-      - name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
-        shell: bash
-        if: github.event_name == 'schedule'
-        run: |
-          buildTime=`date "+%Y%m%d"`
-          SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
-          echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
-
-      - name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
-        shell: bash
-        if: github.event_name != 'schedule'
-        run: |
-          VERSION=${{ github.ref_name }}
-          echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
-
-      - name: Set up buildx
-        uses: docker/setup-buildx-action@v2
-
-      - name: Download amd64 binary
-        uses: actions/download-artifact@v3
+  stop-linux-arm64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
+    name: Stop linux-arm64 runner
+    # Only run this job when the runner is allocated.
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    needs: [
+      allocate-runners,
+      build-linux-arm64-artifacts,
+    ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
        with:
-          name: greptime-linux-amd64
-          path: amd64
+          fetch-depth: 0

-      - name: Unzip the amd64 artifacts
-        run: |
-          cd amd64
-          tar xvf greptime-linux-amd64.tgz
-          rm greptime-linux-amd64.tgz
-
-      - name: Download arm64 binary
-        id: download-arm64
-        uses: actions/download-artifact@v3
+      - name: Stop EC2 runner
+        uses: ./.github/actions/stop-runner
        with:
-          name: greptime-linux-arm64
-          path: arm64
-
-      - name: Unzip the arm64 artifacts
-        id: unzip-arm64
-        if: success() || steps.download-arm64.conclusion == 'success'
-        run: |
-          cd arm64
-          tar xvf greptime-linux-arm64.tgz
-          rm greptime-linux-arm64.tgz
-
-      - name: Build and push all
-        uses: docker/build-push-action@v3
-        if: success() || steps.unzip-arm64.conclusion == 'success' # Build and push all platform if unzip-arm64 succeeds
-        with:
-          context: .
-          file: ./docker/ci/Dockerfile
-          push: true
-          platforms: linux/amd64,linux/arm64
-          tags: |
-            greptime/greptimedb:latest
-            greptime/greptimedb:${{ env.IMAGE_TAG }}
-            uhub.service.ucloud.cn/greptime/greptimedb:latest
-            uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}
-
-      - name: Build and push amd64 only
-        uses: docker/build-push-action@v3
-        if: success() || steps.download-arm64.conclusion == 'failure' # Only build and push amd64 platform if download-arm64 fails
-        with:
-          context: .
-          file: ./docker/ci/Dockerfile
-          push: true
-          platforms: linux/amd64
-          tags: |
-            greptime/greptimedb:latest
-            greptime/greptimedb:${{ env.IMAGE_TAG }}
-            uhub.service.ucloud.cn/greptime/greptimedb:latest
-            uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}
+          label: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-label }}
+          ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-instance-id }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.EC2_RUNNER_REGION }}
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 # Generated by Cargo
 # will have compiled files and executables
 /target/
+# also ignore if it's a symbolic link
+/target

 # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
@@ -35,3 +37,12 @@ benchmarks/data

 # dotenv
 .env
+
+# dashboard files
+!/src/servers/dashboard/VERSION
+/src/servers/dashboard/*
+
+# Vscode workspace
+*.code-workspace
+
+venv/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -51,7 +51,7 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
 - To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
 - Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
 - Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
+- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).

 #### `pre-commit` Hooks

@@ -107,6 +107,6 @@ The core team will be thrilled if you participate in any way you like. When you

 Also, see some extra GreptimeDB content:

- [GreptimeDB Docs](https://greptime.com/docs)
- [Learn GreptimeDB](https://greptime.com/products/db)
+- [GreptimeDB Docs](https://docs.greptime.com/)
+- [Learn GreptimeDB](https://greptime.com/product/db)
 - [Greptime Inc. Website](https://greptime.com)
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,13 +7,17 @@ members = [
    "src/cmd",
    "src/common/base",
    "src/common/catalog",
+    "src/common/datasource",
    "src/common/error",
    "src/common/function",
    "src/common/function-macro",
    "src/common/grpc",
    "src/common/grpc-expr",
    "src/common/mem-prof",
+    "src/common/meta",
    "src/common/procedure",
+    "src/common/procedure-test",
+    "src/common/pprof",
    "src/common/query",
    "src/common/recordbatch",
    "src/common/runtime",
@@ -23,11 +27,13 @@ members = [
    "src/common/time",
    "src/datanode",
    "src/datatypes",
+    "src/file-table-engine",
    "src/frontend",
    "src/log-store",
    "src/meta-client",
    "src/meta-srv",
    "src/mito",
+    "src/mito2",
    "src/object-store",
    "src/partition",
    "src/promql",
@@ -45,38 +51,55 @@ members = [
 ]

 [workspace.package]
-version = "0.1.1"
+version = "0.3.2"
 edition = "2021"
 license = "Apache-2.0"

 [workspace.dependencies]
-arrow = { version = "33.0" }
-arrow-array = "33.0"
-arrow-flight = "33.0"
-arrow-schema = { version = "33.0", features = ["serde"] }
+arrow = { version = "43.0" }
+etcd-client = "0.11"
+arrow-array = "43.0"
+arrow-flight = "43.0"
+arrow-schema = { version = "43.0", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 chrono = { version = "0.4", features = ["serde"] }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
-datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
-datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
-datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
-datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
-datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
+datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
+derive_builder = "0.12"
 futures = "0.3"
 futures-util = "0.3"
-parquet = "33.0"
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "eeae2d0dfa8ee320a7b9e987b4631a6c1c732ebd" }
+itertools = "0.10"
+lazy_static = "1.4"
+once_cell = "1.18"
+opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics"] }
+parquet = "43.0"
 paste = "1.0"
 prost = "0.11"
+rand = "0.8"
+regex = "1.8"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 snafu = { version = "0.7", features = ["backtraces"] }
-sqlparser = "0.30"
+sqlparser = "0.35"
 tempfile = "3"
-tokio = { version = "1.24.2", features = ["full"] }
-tokio-util = "0.7"
-tonic = { version = "0.8", features = ["tls"] }
+tokio = { version = "1.28", features = ["full"] }
+tokio-util = { version = "0.7", features = ["io-util", "compat"] }
+toml = "0.7"
+tonic = { version = "0.9", features = ["tls"] }
 uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
+metrics = "0.20"
+meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
+
+[workspace.dependencies.meter-macros]
+git = "https://github.com/GreptimeTeam/greptime-meter.git"
+rev = "abbd357c1e193cd270ea65ee7652334a150b628f"

 [profile.release]
 debug = true
--- a/Cross.toml
+++ b/Cross.toml
@@ -0,0 +1,7 @@
+[build]
+pre-build = [
+    "dpkg --add-architecture $CROSS_DEB_ARCH",
+    "apt update && apt install -y unzip zlib1g-dev zlib1g-dev:$CROSS_DEB_ARCH",
+    "curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
+    "chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
+]
--- a/139
+++ b/139
@@ -1,15 +1,77 @@
-IMAGE_REGISTRY ?= greptimedb
+# The arguments for building images.
+CARGO_PROFILE ?=
+FEATURES ?=
+TARGET_DIR ?=
+TARGET ?=
+CARGO_BUILD_OPTS := --locked
+IMAGE_REGISTRY ?= docker.io
+IMAGE_NAMESPACE ?= greptime
 IMAGE_TAG ?= latest
+BUILDX_MULTI_PLATFORM_BUILD ?= false
+BUILDX_BUILDER_NAME ?= gtbuilder
+BASE_IMAGE ?= ubuntu
+RUST_TOOLCHAIN ?= $(shell cat rust-toolchain.toml | grep channel | cut -d'"' -f2)
+CARGO_REGISTRY_CACHE ?= ${HOME}/.cargo/registry
+
+# The arguments for running integration tests.
+ETCD_VERSION ?= v3.5.9
+ETCD_IMAGE ?= quay.io/coreos/etcd:${ETCD_VERSION}
+RETRY_COUNT ?= 3
+NEXTEST_OPTS := --retries ${RETRY_COUNT}
+BUILD_JOBS ?= $(shell which nproc 1>/dev/null && expr $$(nproc) / 2) # If nproc is not available, we don't set the build jobs.
+ifeq ($(BUILD_JOBS), 0) # If the number of cores is less than 2, set the build jobs to 1.
+  BUILD_JOBS := 1
+endif
+
+ifneq ($(strip $(BUILD_JOBS)),)
+	NEXTEST_OPTS += --build-jobs=${BUILD_JOBS}
+endif
+
+ifneq ($(strip $(CARGO_PROFILE)),)
+	CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE}
+endif
+
+ifneq ($(strip $(FEATURES)),)
+	CARGO_BUILD_OPTS += --features ${FEATURES}
+endif
+
+ifneq ($(strip $(TARGET_DIR)),)
+	CARGO_BUILD_OPTS += --target-dir ${TARGET_DIR}
+endif
+
+ifneq ($(strip $(TARGET)),)
+	CARGO_BUILD_OPTS += --target ${TARGET}
+endif
+
+ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), true)
+	BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
+else
+	BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
+endif

 ##@ Build

 .PHONY: build
-build: ## Build debug version greptime.
-	cargo build
+build: ## Build debug version greptime. If USE_DEV_BUILDER is true, the binary will be built in dev-builder.
+ifeq ($(USE_DEV_BUILDER), true)
+	docker run --network=host \
+	-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
+	-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder:latest \
+	make build CARGO_PROFILE=${CARGO_PROFILE} FEATURES=${FEATURES} TARGET_DIR=${TARGET_DIR}
+else
+	cargo build ${CARGO_BUILD_OPTS}
+endif

 .PHONY: release
-release:  ## Build release version greptime.
-	cargo build --release
+release:  ## Build release version greptime. If USE_DEV_BUILDER is true, the binary will be built in dev-builder.
+ifeq ($(USE_DEV_BUILDER), true)
+	docker run --network=host \
+	-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
+	-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder:latest \
+	make release CARGO_PROFILE=${CARGO_PROFILE} FEATURES=${FEATURES} TARGET_DIR=${TARGET_DIR}
+else
+	cargo build --release ${CARGO_BUILD_OPTS}
+endif

 .PHONY: clean
 clean: ## Clean the project.
@@ -21,21 +83,49 @@ fmt: ## Format all the Rust code.

 .PHONY: fmt-toml
 fmt-toml: ## Format all TOML files.
-	taplo format --check --option "indent_string=    "
+	taplo format
+
+.PHONY: check-toml
+check-toml: ## Check all TOML files.
+	taplo format --check

 .PHONY: docker-image
-docker-image: ## Build docker image.
-	docker build --network host -f docker/Dockerfile -t ${IMAGE_REGISTRY}:${IMAGE_TAG} .
+docker-image: multi-platform-buildx ## Build docker image.
+	docker buildx build --builder ${BUILDX_BUILDER_NAME} \
+	  --build-arg="CARGO_PROFILE=${CARGO_PROFILE}" --build-arg="FEATURES=${FEATURES}" \
+	  -f docker/${BASE_IMAGE}/Dockerfile \
+	  -t ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/greptimedb:${IMAGE_TAG} ${BUILDX_MULTI_PLATFORM_BUILD_OPTS} .
+
+.PHONY: build-greptime-by-buildx
+build-greptime-by-buildx: multi-platform-buildx ## Build greptime binary by docker buildx. The binary will be copied to the current directory.
+	docker buildx build --builder ${BUILDX_BUILDER_NAME} \
+	  --target=builder \
+	  --build-arg="CARGO_PROFILE=${CARGO_PROFILE}" --build-arg="FEATURES=${FEATURES}" \
+	  -f docker/${BASE_IMAGE}/Dockerfile \
+	  -t ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/greptimedb-builder:${IMAGE_TAG} ${BUILDX_MULTI_PLATFORM_BUILD_OPTS} .
+
+	docker run --rm -v ${PWD}:/data \
+      --entrypoint cp ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/greptimedb-builder:${IMAGE_TAG} \
+      /out/target/${CARGO_PROFILE}/greptime /data/greptime
+
+.PHONY: dev-builder
+dev-builder: multi-platform-buildx ## Build dev-builder image.
+	docker buildx build --builder ${BUILDX_BUILDER_NAME} \
+	--build-arg="RUST_TOOLCHAIN=${RUST_TOOLCHAIN}" \
+	-f docker/dev-builder/Dockerfile \
+	-t ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder:${IMAGE_TAG} ${BUILDX_MULTI_PLATFORM_BUILD_OPTS} .
+
+.PHONY: multi-platform-buildx
+multi-platform-buildx: ## Create buildx multi-platform builder.
+	docker buildx inspect ${BUILDX_BUILDER_NAME} || docker buildx create --name ${BUILDX_BUILDER_NAME} --driver docker-container --bootstrap --use

 ##@ Test
+test: nextest ## Run unit and integration tests.
+	cargo nextest run ${NEXTEST_OPTS}

-.PHONY: unit-test
-unit-test: ## Run unit test.
-	cargo test --workspace
-
-.PHONY: integration-test
-integration-test: ## Run integation test.
-	cargo test integration
+.PHONY: nextest ## Install nextest tools.
+nextest:
+	cargo --list | grep nextest || cargo install cargo-nextest --locked

 .PHONY: sqlness-test
 sqlness-test: ## Run sqlness test.
@@ -47,12 +137,27 @@ check: ## Cargo check all the targets.

 .PHONY: clippy
 clippy: ## Check clippy rules.
-	cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
+	cargo clippy --workspace --all-targets -- -D warnings

 .PHONY: fmt-check
 fmt-check: ## Check code format.
 	cargo fmt --all -- --check

+.PHONY: start-etcd
+start-etcd: ## Start single node etcd for testing purpose.
+	docker run --rm -d --network=host -p 2379-2380:2379-2380 ${ETCD_IMAGE}
+
+.PHONY: stop-etcd
+stop-etcd: ## Stop single node etcd for testing purpose.
+	docker stop $$(docker ps -q --filter ancestor=${ETCD_IMAGE})
+
+.PHONY: run-it-in-container
+run-it-in-container: start-etcd ## Run integration tests in dev-builder.
+	docker run --network=host \
+	-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry -v /tmp:/tmp \
+	-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder:latest \
+	make test sqlness-test BUILD_JOBS=${BUILD_JOBS}
+
 ##@ General

 # The help target prints out all targets with their descriptions organized
@@ -68,4 +173,4 @@ fmt-check: ## Check code format.

 .PHONY: help
 help: ## Display help messages.
-	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
+	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-30s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
 <p align="center">
  <picture>
-    <source media="(prefers-color-scheme: light)" srcset="/docs/logo-text-padding.png">
-    <source media="(prefers-color-scheme: dark)" srcset="/docs/logo-text-padding-dark.png">
-    <img alt="GreptimeDB Logo" src="/docs/logo-text-padding.png" width="400px">
+    <source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
+    <source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
+    <img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
  </picture>
 </p>


 <h3 align="center">
-    The next-generation hybrid timeseries/analytics processing database in the cloud
+    The next-generation hybrid time-series/analytics processing database in the cloud
 </h3>

 <p align="center">
@@ -23,6 +23,8 @@
    <a href="https://twitter.com/greptime"><img src="https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg"></a>
    &nbsp;
    <a href="https://www.linkedin.com/company/greptime/"><img src="https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg"></a>
+    &nbsp;
+    <a href="https://greptime.com/slack"><img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack" alt="slack" /></a>
 </p>

 ## What is GreptimeDB
@@ -36,15 +38,19 @@ Our core developers have been building time-series data platform
 for years. Based on their best-practices, GreptimeDB is born to give you:

 - A standalone binary that scales to highly-available distributed cluster, providing a transparent experience for cluster users
- Optimized columnar layout for handling time-series data; compacted, compressed, stored on various storage backends
- Flexible index options, tackling high cardinality issues down
+- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends
+- Flexible indexes, tackling high cardinality issues down
 - Distributed, parallel query execution, leveraging elastic computing resource
 - Native SQL, and Python scripting for advanced analytical scenarios
- Widely adopted database protocols and APIs
+- Widely adopted database protocols and APIs, native PromQL supports
 - Extensible table engine architecture for extensive workloads

 ## Quick Start

+### [GreptimePlay](https://greptime.com/playground)
+
+Try out the features of GreptimeDB right from your browser.
+
 ### Build

 #### Build from Source
@@ -61,12 +67,12 @@ To compile GreptimeDB from source, you'll need:
  find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
  **Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
  keyword. You can check it with `protoc --version`.
- python3-dev or python3-devel(Optional, only needed if you want to run scripts
-  in cpython): this install a Python shared library required for running python
+- python3-dev or python3-devel(Optional feature, only needed if you want to run scripts
+  in CPython, and also need to enable `pyo3_backend` feature when compiling(by `cargo run -F pyo3_backend` or add `pyo3_backend` to src/script/Cargo.toml 's `features.default` like `default = ["python", "pyo3_backend]`)): this install a Python shared library required for running Python
  scripting engine(In CPython Mode). This is available as `python3-dev` on
  ubuntu, you can install it with `sudo apt install python3-dev`, or
  `python3-devel` on RPM based distributions (e.g. Fedora, Red Hat, SuSE). Mac's
-  `Python3` package should have this shared library by default.
+  `Python3` package should have this shared library by default. More detail for compiling with PyO3 can be found in [PyO3](https://pyo3.rs/v0.18.1/building_and_distribution#configuring-the-python-version)'s documentation.

 #### Build with Docker

@@ -90,82 +96,45 @@ Or if you built from docker:
 docker run -p 4002:4002 -v "$(pwd):/tmp/greptimedb" greptime/greptimedb standalone start
 ```

-For more startup options, greptimedb's **distributed mode** and information
-about Kubernetes deployment, check our [docs](https://docs.greptime.com/).
+Please see [the online document site](https://docs.greptime.com/getting-started/overview#install-greptimedb) for more installation options and [operations info](https://docs.greptime.com/user-guide/operations/overview).

-### Connect
+### Get started

-1. Connect to GreptimeDB via standard [MySQL
-   client](https://dev.mysql.com/downloads/mysql/):
+Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview#connect) on our [official document site](https://docs.greptime.com/).

-   ```
-   # The standalone instance listen on port 4002 by default.
-   mysql -h 127.0.0.1 -P 4002
-   ```
-
-2. Create table:
-
-   ```SQL
-   CREATE TABLE monitor (
-     host STRING,
-     ts TIMESTAMP,
-     cpu DOUBLE DEFAULT 0,
-     memory DOUBLE,
-     TIME INDEX (ts),
-     PRIMARY KEY(host)) ENGINE=mito WITH(regions=1);
-   ```
-
-3. Insert some data:
-
-   ```SQL
-   INSERT INTO monitor(host, cpu, memory, ts) VALUES ('host1', 66.6, 1024, 1660897955000);
-   INSERT INTO monitor(host, cpu, memory, ts) VALUES ('host2', 77.7, 2048, 1660897956000);
-   INSERT INTO monitor(host, cpu, memory, ts) VALUES ('host3', 88.8, 4096, 1660897957000);
-   ```
-
-4. Query the data:
-
-   ```SQL
-   SELECT * FROM monitor;
-   ```
-
-   ```TEXT
-   +-------+---------------------+------+--------+
-   | host  | ts                  | cpu  | memory |
-   +-------+---------------------+------+--------+
-   | host1 | 2022-08-19 08:32:35 | 66.6 |   1024 |
-   | host2 | 2022-08-19 08:32:36 | 77.7 |   2048 |
-   | host3 | 2022-08-19 08:32:37 | 88.8 |   4096 |
-   +-------+---------------------+------+--------+
-   3 rows in set (0.01 sec)
-   ```
-
-You can always cleanup test database by removing `/tmp/greptimedb`.
+To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview).

 ## Resources

 ### Installation

- [Pre-built Binaries](https://github.com/GreptimeTeam/greptimedb/releases):
-  downloadable pre-built binaries for Linux and MacOS
- [Docker Images](https://hub.docker.com/r/greptime/greptimedb): pre-built
-  Docker images
+- [Pre-built Binaries](https://greptime.com/download):
+  For Linux and macOS, you can easily download pre-built binaries including official releases and nightly builds that are ready to use. 
+  In most cases, downloading the version without PyO3 is sufficient. However, if you plan to run scripts in CPython (and use Python packages like NumPy and Pandas), you will need to download the version with PyO3 and install a Python with the same version as the Python in the PyO3 version.
+  We recommend using virtualenv for the installation process to manage multiple Python versions.
+- [Docker Images](https://hub.docker.com/r/greptime/greptimedb)(**recommended**): pre-built
+  Docker images, this is the easiest way to try GreptimeDB. By default it runs CPython script with `pyo3_backend` enabled.
 - [`gtctl`](https://github.com/GreptimeTeam/gtctl): the command-line tool for
  Kubernetes deployment

 ### Documentation

- GreptimeDB [User Guide](https://docs.greptime.com/user-guide/concepts.html)
+- GreptimeDB [User Guide](https://docs.greptime.com/user-guide/concepts/overview)
 - GreptimeDB [Developer
  Guide](https://docs.greptime.com/developer-guide/overview.html)
+- GreptimeDB [internal code document](https://greptimedb.rs)

 ### Dashboard
 - [The dashboard UI for GreptimeDB](https://github.com/GreptimeTeam/dashboard)

 ### SDK

- [GreptimeDB Java
-  Client](https://github.com/GreptimeTeam/greptimedb-client-java)
+- [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp)
+- [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl)
+- [GreptimeDB Go Client](https://github.com/GreptimeTeam/greptimedb-client-go)
+- [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java)
+- [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
+- [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)

 ## Project Status

--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,19 @@
+# Security Policy
+
+## Supported Versions
+
+| Version | Supported          |
+| ------- | ------------------ |
+| >= v0.1.0 | :white_check_mark: |
+| < v0.1.0   | :x:                |
+
+## Reporting a Vulnerability
+
+We place great importance on the security of GreptimeDB code, software, 
+and cloud platform. If you come across a security vulnerability in GreptimeDB, 
+we kindly request that you inform us immediately. We will thoroughly investigate
+all valid reports and make every effort to resolve the issue promptly.
+
+To report any issues or vulnerabilities, please email us at info@greptime.com, rather than 
+posting publicly on GitHub. Be sure to provide us with the version identifier as well as details
+on how the vulnerability can be exploited.
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -9,6 +9,6 @@ arrow.workspace = true
 clap = { version = "4.0", features = ["derive"] }
 client = { path = "../src/client" }
 indicatif = "0.17.1"
-itertools = "0.10.5"
+itertools.workspace = true
 parquet.workspace = true
 tokio.workspace = true
--- a/benchmarks/src/bin/nyc-taxi.rs
+++ b/benchmarks/src/bin/nyc-taxi.rs
@@ -21,12 +21,14 @@ use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use std::time::Instant;

-use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
+use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampMicrosecondArray};
 use arrow::datatypes::{DataType, Float64Type, Int64Type};
 use arrow::record_batch::RecordBatch;
 use clap::Parser;
 use client::api::v1::column::Values;
-use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, TableId};
+use client::api::v1::{
+    Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests,
+};
 use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
 use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
 use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
@@ -61,7 +63,7 @@ struct Args {
    #[arg(long = "skip-read")]
    skip_read: bool,

-    #[arg(short, long, default_value_t = String::from("127.0.0.1:3001"))]
+    #[arg(short, long, default_value_t = String::from("127.0.0.1:4001"))]
    endpoint: String,
 }

@@ -97,6 +99,9 @@ async fn write_data(

    for record_batch in record_batch_reader {
        let record_batch = record_batch.unwrap();
+        if !is_record_batch_full(&record_batch) {
+            continue;
+        }
        let (columns, row_count) = convert_record_batch(record_batch);
        let request = InsertRequest {
            table_name: TABLE_NAME.to_string(),
@@ -104,8 +109,12 @@ async fn write_data(
            columns,
            row_count,
        };
+        let requests = InsertRequests {
+            inserts: vec![request],
+        };
+
        let now = Instant::now();
-        db.insert(request).await.unwrap();
+        let _ = db.insert(requests).await.unwrap();
        let elapsed = now.elapsed();
        total_rpc_elapsed_ms += elapsed.as_millis();
        progress_bar.inc(row_count as _);
@@ -122,11 +131,17 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
    let mut columns = vec![];

    for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
-        let values = build_values(array);
+        let (values, datatype) = build_values(array);
+
        let column = Column {
-            column_name: field.name().to_owned(),
+            column_name: field.name().clone(),
            values: Some(values),
-            null_mask: vec![],
+            null_mask: array
+                .to_data()
+                .nulls()
+                .map(|bitmap| bitmap.buffer().as_slice().to_vec())
+                .unwrap_or_default(),
+            datatype: datatype.into(),
            // datatype and semantic_type are set to default
            ..Default::default()
        };
@@ -136,7 +151,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
    (columns, row_count as _)
 }

-fn build_values(column: &ArrayRef) -> Values {
+fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
    match column.data_type() {
        DataType::Int64 => {
            let array = column
@@ -144,10 +159,13 @@ fn build_values(column: &ArrayRef) -> Values {
                .downcast_ref::<PrimitiveArray<Int64Type>>()
                .unwrap();
            let values = array.values();
-            Values {
-                i64_values: values.to_vec(),
-                ..Default::default()
-            }
+            (
+                Values {
+                    i64_values: values.to_vec(),
+                    ..Default::default()
+                },
+                ColumnDataType::Int64,
+            )
        }
        DataType::Float64 => {
            let array = column
@@ -155,29 +173,38 @@ fn build_values(column: &ArrayRef) -> Values {
                .downcast_ref::<PrimitiveArray<Float64Type>>()
                .unwrap();
            let values = array.values();
-            Values {
-                f64_values: values.to_vec(),
-                ..Default::default()
-            }
+            (
+                Values {
+                    f64_values: values.to_vec(),
+                    ..Default::default()
+                },
+                ColumnDataType::Float64,
+            )
        }
        DataType::Timestamp(_, _) => {
            let array = column
                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
+                .downcast_ref::<TimestampMicrosecondArray>()
                .unwrap();
            let values = array.values();
-            Values {
-                i64_values: values.to_vec(),
-                ..Default::default()
-            }
+            (
+                Values {
+                    ts_microsecond_values: values.to_vec(),
+                    ..Default::default()
+                },
+                ColumnDataType::TimestampMicrosecond,
+            )
        }
        DataType::Utf8 => {
            let array = column.as_any().downcast_ref::<StringArray>().unwrap();
            let values = array.iter().filter_map(|s| s.map(String::from)).collect();
-            Values {
-                string_values: values,
-                ..Default::default()
-            }
+            (
+                Values {
+                    string_values: values,
+                    ..Default::default()
+                },
+                ColumnDataType::String,
+            )
        }
        DataType::Null
        | DataType::Boolean
@@ -204,7 +231,7 @@ fn build_values(column: &ArrayRef) -> Values {
        | DataType::FixedSizeList(_, _)
        | DataType::LargeList(_)
        | DataType::Struct(_)
-        | DataType::Union(_, _, _)
+        | DataType::Union(_, _)
        | DataType::Dictionary(_, _)
        | DataType::Decimal128(_, _)
        | DataType::Decimal256(_, _)
@@ -213,6 +240,10 @@ fn build_values(column: &ArrayRef) -> Values {
    }
 }

+fn is_record_batch_full(batch: &RecordBatch) -> bool {
+    batch.columns().iter().all(|col| col.null_count() == 0)
+}
+
 fn create_table_expr() -> CreateTableExpr {
    CreateTableExpr {
        catalog_name: CATALOG_NAME.to_string(),
@@ -228,13 +259,13 @@ fn create_table_expr() -> CreateTableExpr {
            },
            ColumnDef {
                name: "tpep_pickup_datetime".to_string(),
-                datatype: ColumnDataType::Int64 as i32,
+                datatype: ColumnDataType::TimestampMicrosecond as i32,
                is_nullable: true,
                default_constraint: vec![],
            },
            ColumnDef {
                name: "tpep_dropoff_datetime".to_string(),
-                datatype: ColumnDataType::Int64 as i32,
+                datatype: ColumnDataType::TimestampMicrosecond as i32,
                is_nullable: true,
                default_constraint: vec![],
            },
@@ -339,25 +370,23 @@ fn create_table_expr() -> CreateTableExpr {
        primary_keys: vec!["VendorID".to_string()],
        create_if_not_exists: false,
        table_options: Default::default(),
-        region_ids: vec![0],
-        table_id: Some(TableId { id: 0 }),
+        region_numbers: vec![0],
+        table_id: None,
+        engine: "mito".to_string(),
    }
 }

 fn query_set() -> HashMap<String, String> {
-    let mut ret = HashMap::new();
-
-    ret.insert(
-        "count_all".to_string(),
-        format!("SELECT COUNT(*) FROM {TABLE_NAME};"),
-    );
-
-    ret.insert(
-        "fare_amt_by_passenger".to_string(),
-        format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count")
-    );
-
-    ret
+    HashMap::from([
+        (
+            "count_all".to_string(), 
+            format!("SELECT COUNT(*) FROM {TABLE_NAME};"),
+        ),
+        (
+            "fare_amt_by_passenger".to_string(),
+            format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count"),
+        )
+    ])
 }

 async fn do_write(args: &Args, db: &Database) {
@@ -382,7 +411,8 @@ async fn do_write(args: &Args, db: &Database) {
            let db = db.clone();
            let mpb = multi_progress_bar.clone();
            let pb_style = progress_bar_style.clone();
-            write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
+            let _ = write_jobs
+                .spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
        }
    }
    while write_jobs.join_next().await.is_some() {
@@ -391,7 +421,8 @@ async fn do_write(args: &Args, db: &Database) {
            let db = db.clone();
            let mpb = multi_progress_bar.clone();
            let pb_style = progress_bar_style.clone();
-            write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
+            let _ = write_jobs
+                .spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
        }
    }
 }
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -11,6 +11,10 @@ rpc_hostname = "127.0.0.1"
 # The number of gRPC server worker threads, 8 by default.
 rpc_runtime_size = 8

+[heartbeat]
+# Interval for sending heartbeat messages to the Metasrv in milliseconds, 5000 by default.
+interval_millis = 5000
+
 # Metasrv client options.
 [meta_client_options]
 # Metasrv address list.
@@ -24,9 +28,10 @@ tcp_nodelay = true

 # WAL options, see `standalone.example.toml`.
 [wal]
-dir = "/tmp/greptimedb/wal"
-file_size = "1GB"
-purge_threshold = "50GB"
+# WAL data directory
+# dir = "/tmp/greptimedb/wal"
+file_size = "256MB"
+purge_threshold = "4GB"
 purge_interval = "10m"
 read_batch_size = 128
 sync_write = false
@@ -34,15 +39,47 @@ sync_write = false
 # Storage options, see `standalone.example.toml`.
 [storage]
 type = "File"
-data_dir = "/tmp/greptimedb/data/"
+data_home = "/tmp/greptimedb/"
+# TTL for all tables. Disabled by default.
+# global_ttl = "7d"

 # Compaction options, see `standalone.example.toml`.
-[compaction]
+[storage.compaction]
 max_inflight_tasks = 4
 max_files_in_level0 = 8
 max_purge_tasks = 32

+# Storage manifest options
+[storage.manifest]
+# Region checkpoint actions margin.
+# Create a checkpoint every <checkpoint_margin> actions.
+checkpoint_margin = 10
+# Region manifest logs and checkpoints gc execution duration
+gc_duration = '10m'
+# Whether to try creating a manifest checkpoint on region opening
+checkpoint_on_startup = false
+
+# Storage flush options
+[storage.flush]
+# Max inflight flush tasks.
+max_flush_tasks = 8
+# Default write buffer size for a region.
+region_write_buffer_size = "32MB"
+# Interval to check whether a region needs flush.
+picker_schedule_interval = "5m"
+# Interval to auto flush a region if it has not flushed yet.
+auto_flush_interval = "1h"
+# Global write buffer size for all regions.
+global_write_buffer_size = "1GB"
+
 # Procedure storage options, see `standalone.example.toml`.
-# [procedure.store]
-# type = 'File'
-# data_dir = '/tmp/greptimedb/procedure/'
+[procedure]
+max_retry_times = 3
+retry_delay = "500ms"
+
+# Log options
+# [logging]
+# Specify logs directory.
+# dir = "/tmp/greptimedb/logs"
+# Specify the log level [info | debug | error | warn]
+# level = "info"
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -1,10 +1,17 @@
 # Node running mode, see `standalone.example.toml`.
 mode = "distributed"

+[heartbeat]
+# Interval for sending heartbeat task to the Metasrv in milliseconds, 5000 by default.
+interval_millis = 5000
+# Interval for retry sending heartbeat task in milliseconds, 5000 by default.
+retry_interval_millis = 5000
+
 # HTTP server options, see `standalone.example.toml`.
 [http_options]
 addr = "127.0.0.1:4000"
 timeout = "30s"
+body_limit = "64MB"

 # gRPC server options, see `standalone.example.toml`.
 [grpc_options]
@@ -42,17 +49,32 @@ runtime_size = 2
 [influxdb_options]
 enable = true

-# Prometheus protocol options, see `standalone.example.toml`.
-[prometheus_options]
+# Prometheus remote storage options, see `standalone.example.toml`.
+[prom_store_options]
 enable = true

 # Prometheus protocol options, see `standalone.example.toml`.
-[prom_options]
+[prometheus_options]
 addr = "127.0.0.1:4004"

 # Metasrv client options, see `datanode.example.toml`.
 [meta_client_options]
 metasrv_addrs = ["127.0.0.1:3002"]
 timeout_millis = 3000
+# DDL timeouts options.
+ddl_timeout_millis = 10000
 connect_timeout_millis = 5000
 tcp_nodelay = true
+
+# Log options, see `standalone.example.toml`
+# [logging]
+# dir = "/tmp/greptimedb/logs"
+# level = "info"
+
+# Datanode options.
+[datanode]
+# Datanode client options.
+[datanode.client]
+timeout = "10s"
+connect_timeout = "10s"
+tcp_nodelay = true
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -13,3 +13,23 @@ datanode_lease_secs = 15
 selector = "LeaseBased"
 # Store data in memory, false by default.
 use_memory_store = false
+
+# Log options, see `standalone.example.toml`
+# [logging]
+# dir = "/tmp/greptimedb/logs"
+# level = "info"
+
+# Procedure storage options.
+[procedure]
+# Procedure max retry time.
+max_retry_times = 3
+# Initial retry delay of procedures, increases exponentially
+retry_delay = "500ms"
+
+# # Datanode options.
+# [datanode]
+# # Datanode client options.
+# [datanode.client_options]
+# timeout_millis = 10000
+# connect_timeout_millis = 10000
+# tcp_nodelay = true
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -9,6 +9,9 @@ enable_memory_catalog = false
 addr = "127.0.0.1:4000"
 # HTTP request timeout, 30s by default.
 timeout = "30s"
+# HTTP request body limit, 64Mb by default.
+# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
+body_limit = "64MB"

 # gRPC server options.
 [grpc_options]
@@ -66,24 +69,24 @@ runtime_size = 2
 # Whether to enable InfluxDB protocol in HTTP API, true by default.
 enable = true

-# Prometheus protocol options.
-[prometheus_options]
+# Prometheus remote storage options
+[prom_store_options]
 # Whether to enable Prometheus remote write and read in HTTP API, true by default.
 enable = true

-# Prom protocol options.
-[prom_options]
+# Prometheus protocol options
+[prometheus_options]
 # Prometheus API server address, "127.0.0.1:4004" by default.
 addr = "127.0.0.1:4004"

 # WAL options.
 [wal]
-# WAL data directory.
-dir = "/tmp/greptimedb/wal"
+# WAL data directory
+# dir = "/tmp/greptimedb/wal"
 # WAL file size in bytes.
-file_size = "1GB"
-# WAL purge threshold in bytes.
-purge_threshold = "50GB"
+file_size = "256MB"
+# WAL purge threshold.
+purge_threshold = "4GB"
 # WAL purge interval in seconds.
 purge_interval = "10m"
 # WAL read batch size.
@@ -96,10 +99,12 @@ sync_write = false
 # Storage type.
 type = "File"
 # Data directory, "/tmp/greptimedb/data" by default.
-data_dir = "/tmp/greptimedb/data/"
+data_home = "/tmp/greptimedb/"
+# TTL for all tables. Disabled by default.
+# global_ttl = "7d"

 # Compaction options.
-[compaction]
+[storage.compaction]
 # Max task number that can concurrently run.
 max_inflight_tasks = 4
 # Max files in level 0 to trigger compaction.
@@ -107,10 +112,39 @@ max_files_in_level0 = 8
 # Max task number for SST purge task after compaction.
 max_purge_tasks = 32

+# Storage manifest options
+[storage.manifest]
+# Region checkpoint actions margin.
+# Create a checkpoint every <checkpoint_margin> actions.
+checkpoint_margin = 10
+# Region manifest logs and checkpoints gc execution duration
+gc_duration = '10m'
+# Whether to try creating a manifest checkpoint on region opening
+checkpoint_on_startup = false
+
+# Storage flush options
+[storage.flush]
+# Max inflight flush tasks.
+max_flush_tasks = 8
+# Default write buffer size for a region.
+region_write_buffer_size = "32MB"
+# Interval to check whether a region needs flush.
+picker_schedule_interval = "5m"
+# Interval to auto flush a region if it has not flushed yet.
+auto_flush_interval = "1h"
+# Global write buffer size for all regions.
+global_write_buffer_size = "1GB"
+
 # Procedure storage options.
-# Uncomment to enable.
-# [procedure.store]
-# # Storage type.
-# type = "File"
-# # Procedure data path.
-# data_dir = "/tmp/greptimedb/procedure/"
+[procedure]
+# Procedure max retry time.
+max_retry_times = 3
+# Initial retry delay of procedures, increases exponentially
+retry_delay = "500ms"
+
+# Log options
+# [logging]
+# Specify logs directory.
+# dir = "/tmp/greptimedb/logs"
+# Specify the log level [info | debug | error | warn]
+# level = "info"
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,36 +0,0 @@
-FROM ubuntu:22.04 as builder
-
-ENV LANG en_US.utf8
-WORKDIR /greptimedb
-
-# Install dependencies.
-RUN apt-get update && apt-get install -y \
-    libssl-dev \
-    protobuf-compiler \
-    curl \
-    build-essential \
-    pkg-config \
-    python3 \
-    python3-dev \
-    && pip install pyarrow
-
-# Install Rust.
-SHELL ["/bin/bash", "-c"]
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
-ENV PATH /root/.cargo/bin/:$PATH
-
-# Build the project in release mode.
-COPY . .
-RUN cargo build --release
-
-# Export the binary to the clean image.
-# TODO(zyy17): Maybe should use the more secure container image.
-FROM ubuntu:22.04 as base
-
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
-
-WORKDIR /greptime
-COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
-ENV PATH /greptime/bin/:$PATH
-
-ENTRYPOINT ["greptime"]
--- a/docker/aarch64/Dockerfile
+++ b/docker/aarch64/Dockerfile
@@ -1,50 +0,0 @@
-FROM ubuntu:22.04 as builder
-
-ENV LANG en_US.utf8
-WORKDIR /greptimedb
-
-# Install dependencies.
-RUN apt-get update && apt-get install -y \
-    libssl-dev \
-    protobuf-compiler \
-    curl \
-    build-essential \
-    pkg-config \
-    wget
-
-# Install Rust.
-SHELL ["/bin/bash", "-c"]
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
-ENV PATH /root/.cargo/bin/:$PATH
-
-# Install cross platform toolchain
-RUN apt-get -y update && \
-    apt-get -y install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu && \
-    apt-get install binutils-aarch64-linux-gnu
-
-COPY . .
-# This three env var is set in script, so I set it manually in dockerfile.
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/
-ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib/
-ENV PY_INSTALL_PATH=${PWD}/python_arm64_build
-RUN chmod +x ./docker/aarch64/compile-python.sh && \
-    ./docker/aarch64/compile-python.sh
-# Install rustup target for cross compiling.
-RUN rustup target add aarch64-unknown-linux-gnu
-# Set the environment variable for cross compiling and compile it
-# Build the project in release mode. Set Net fetch with git cli to true to avoid git error.
-RUN export PYO3_CROSS_LIB_DIR=$PY_INSTALL_PATH/lib && \ 
-    alias python=python3 && \
-    CARGO_NET_GIT_FETCH_WITH_CLI=1 && \
-    cargo build --target aarch64-unknown-linux-gnu --release -F pyo3_backend
-
-# Exporting the binary to the clean image
-FROM ubuntu:22.04 as base
-
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
-
-WORKDIR /greptime
-COPY --from=builder /greptimedb/target/aarch64-unknown-linux-gnu/release/greptime /greptime/bin/
-ENV PATH /greptime/bin/:$PATH
-
-ENTRYPOINT ["greptime"]
--- a/docker/aarch64/compile-python.sh
+++ b/docker/aarch64/compile-python.sh
@@ -1,46 +0,0 @@
-# this script will download Python source code, compile it, and install it to /usr/local/lib
-# then use this python to compile cross-compiled python for aarch64
-
-wget https://www.python.org/ftp/python/3.10.10/Python-3.10.10.tgz
-tar -xvf Python-3.10.10.tgz
-cd Python-3.10.10
-# explain Python compile options here a bit:s
-# --enable-shared: enable building a shared Python library (default is no) but we do need it for calling from rust
-# CC, CXX, AR, LD, RANLIB: set the compiler, archiver, linker, and ranlib programs to use
-# build: the machine you are building on, host: the machine you will run the compiled program on
-# --with-system-ffi: build _ctypes module using an installed ffi library, see Doc/library/ctypes.rst, not used in here TODO: could remove
-# ac_cv_pthread_is_default=no ac_cv_pthread=yes ac_cv_cxx_thread=yes:
-# allow cross-compiled python to have -pthread set for CXX, see https://github.com/python/cpython/pull/22525
-# ac_cv_have_long_long_format=yes: target platform supports long long type
-# disable-ipv6: disable ipv6 support, we don't need it in here
-# ac_cv_file__dev_ptmx=no ac_cv_file__dev_ptc=no: disable pty support, we don't need it in here
-
-# Build local python first, then build cross-compiled python.
-./configure \
--enable-shared \
-ac_cv_pthread_is_default=no ac_cv_pthread=yes ac_cv_cxx_thread=yes \
-ac_cv_have_long_long_format=yes \
--disable-ipv6 ac_cv_file__dev_ptmx=no ac_cv_file__dev_ptc=no && \
-make
-make install
-cd ..
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/
-export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib/
-export PY_INSTALL_PATH=${PWD}/python_arm64_build
-cd Python-3.10.10 && \
-make clean && \
-make distclean && \
-alias python=python3 && \
-./configure --build=x86_64-linux-gnu --host=aarch64-linux-gnu \
--prefix=$PY_INSTALL_PATH --enable-optimizations \
-CC=aarch64-linux-gnu-gcc \
-CXX=aarch64-linux-gnu-g++ \
-AR=aarch64-linux-gnu-ar \
-LD=aarch64-linux-gnu-ld \
-RANLIB=aarch64-linux-gnu-ranlib \
--enable-shared \
-ac_cv_pthread_is_default=no ac_cv_pthread=yes ac_cv_cxx_thread=yes \
-ac_cv_have_long_long_format=yes \
--disable-ipv6 ac_cv_file__dev_ptmx=no ac_cv_file__dev_ptc=no && \
-make && make altinstall && \
-cd ..
--- a/docker/centos/Dockerfile
+++ b/docker/centos/Dockerfile
@@ -0,0 +1,51 @@
+FROM centos:7 as builder
+
+ARG CARGO_PROFILE
+ARG FEATURES
+
+ENV LANG en_US.utf8
+WORKDIR /greptimedb
+
+# Install dependencies
+RUN ulimit -n 1024000 && yum groupinstall -y 'Development Tools'
+RUN yum install -y epel-release  \
+    openssl \
+    openssl-devel  \
+    centos-release-scl  \
+    rh-python38  \
+    rh-python38-python-devel
+
+# Install protoc
+RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
+RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/
+
+# Install Rust
+SHELL ["/bin/bash", "-c"]
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
+ENV PATH /opt/rh/rh-python38/root/usr/bin:/usr/local/bin:/root/.cargo/bin/:$PATH
+
+# Build the project in release mode.
+RUN --mount=target=.,rw \
+    --mount=type=cache,target=/root/.cargo/registry \
+    make build \
+    CARGO_PROFILE=${CARGO_PROFILE} \
+    FEATURES=${FEATURES} \
+    TARGET_DIR=/out/target
+
+# Export the binary to the clean image.
+FROM centos:7 as base
+
+ARG CARGO_PROFILE
+
+RUN yum install -y epel-release \
+    openssl \
+    openssl-devel  \
+    centos-release-scl  \
+    rh-python38  \
+    rh-python38-python-devel
+
+WORKDIR /greptime
+COPY --from=builder /out/target/${CARGO_PROFILE}/greptime /greptime/bin/
+ENV PATH /greptime/bin/:$PATH
+
+ENTRYPOINT ["greptime"]
--- a/docker/ci/Dockerfile
+++ b/docker/ci/Dockerfile
@@ -1,6 +1,15 @@
 FROM ubuntu:22.04

-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    ca-certificates \
+    python3.10 \
+    python3.10-dev \
+    python3-pip \
+    curl
+
+COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
+
+RUN python3 -m pip install -r /etc/greptime/requirements.txt

 ARG TARGETARCH

--- a/docker/ci/Dockerfile-centos
+++ b/docker/ci/Dockerfile-centos
@@ -0,0 +1,16 @@
+FROM centos:7
+
+RUN yum install -y epel-release \
+    openssl \
+    openssl-devel  \
+    centos-release-scl  \
+    rh-python38  \
+    rh-python38-python-devel
+
+ARG TARGETARCH
+
+ADD $TARGETARCH/greptime /greptime/bin/
+
+ENV PATH /greptime/bin/:$PATH
+
+ENTRYPOINT ["greptime"]
--- a/docker/dev-builder/Dockerfile
+++ b/docker/dev-builder/Dockerfile
@@ -0,0 +1,36 @@
+FROM ubuntu:22.04
+
+ENV LANG en_US.utf8
+WORKDIR /greptimedb
+
+# Install dependencies.
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libssl-dev \
+    tzdata \
+    protobuf-compiler \
+    curl \
+    ca-certificates \
+    git \
+    build-essential \
+    pkg-config \
+    python3.10 \
+    python3.10-dev \
+    python3-pip
+
+RUN git config --global --add safe.directory /greptimedb
+
+# Install Python dependencies.
+COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
+RUN python3 -m pip install -r /etc/greptime/requirements.txt
+
+# Install Rust.
+SHELL ["/bin/bash", "-c"]
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
+ENV PATH /root/.cargo/bin/:$PATH
+
+# Install Rust toolchains.
+ARG RUST_TOOLCHAIN
+RUN rustup toolchain install ${RUST_TOOLCHAIN}
+
+# Install nextest.
+RUN cargo install cargo-nextest --locked
--- a/docker/python/requirements.txt
+++ b/docker/python/requirements.txt
@@ -0,0 +1,5 @@
+numpy>=1.24.2
+pandas>=1.5.3
+pyarrow>=11.0.0
+requests>=2.28.2
+scipy>=1.10.1
--- a/docker/ubuntu/Dockerfile
+++ b/docker/ubuntu/Dockerfile
@@ -0,0 +1,56 @@
+FROM ubuntu:22.04 as builder
+
+ARG CARGO_PROFILE
+ARG FEATURES
+
+ENV LANG en_US.utf8
+WORKDIR /greptimedb
+
+# Install dependencies.
+RUN --mount=type=cache,target=/var/cache/apt \
+    apt-get update && apt-get install -y \
+    libssl-dev \
+    protobuf-compiler \
+    curl \
+    git \
+    build-essential \
+    pkg-config \
+    python3.10 \
+    python3.10-dev \
+    python3-pip
+
+# Install Rust.
+SHELL ["/bin/bash", "-c"]
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
+ENV PATH /root/.cargo/bin/:$PATH
+
+# Build the project in release mode.
+RUN --mount=target=.,rw \
+    --mount=type=cache,target=/root/.cargo/registry \
+    make build \
+    CARGO_PROFILE=${CARGO_PROFILE} \
+    FEATURES=${FEATURES} \
+    TARGET_DIR=/out/target
+
+# Export the binary to the clean image.
+# TODO(zyy17): Maybe should use the more secure container image.
+FROM ubuntu:22.04 as base
+
+ARG CARGO_PROFILE
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get \
+    -y install ca-certificates \
+    python3.10 \
+    python3.10-dev \
+    python3-pip \
+    curl
+
+COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
+
+RUN python3 -m pip install -r /etc/greptime/requirements.txt
+
+WORKDIR /greptime
+COPY --from=builder /out/target/${CARGO_PROFILE}/greptime /greptime/bin/
+ENV PATH /greptime/bin/:$PATH
+
+ENTRYPOINT ["greptime"]
--- a/docs/benchmarks/tsbs/v0.3.2.md
+++ b/docs/benchmarks/tsbs/v0.3.2.md
@@ -0,0 +1,39 @@
+# TSBS benchmark - v0.3.2
+
+## Environment
+
+|     |     |
+| --- | --- |
+| CPU | AMD Ryzen 7 7735HS (8 core 3.2GHz) |
+| Memory | 32GB |
+| Disk | SOLIDIGM SSDPFKNU010TZ |
+| OS | Ubuntu 22.04.2 LTS |
+
+
+## Write performance
+
+| Write buffer size | Ingest rate（rows/s） |
+| --- | --- |
+| 512M | 139583.04 |
+| 32M | 279250.52 |
+
+
+## Query performance
+
+| Query type  | v0.3.2 write buffer 32M (ms) | v0.3.2 write buffer 512M (ms) | v0.3.1 write buffer 32M (ms) |
+| --- | --- | --- | --- |
+| cpu-max-all-1 | 921.12 | 241.23 | 553.63 |
+| cpu-max-all-8 | 2657.66 | 502.78 | 3308.41 |
+| double-groupby-1 | 28238.85 | 27367.42 | 52148.22 |
+| double-groupby-5 | 33094.65 | 32421.89 | 56762.37 |
+| double-groupby-all | 38565.89 | 38635.52 | 59596.80 |
+| groupby-orderby-limit | 23321.60 | 22423.55 | 53983.23 |
+| high-cpu-1 | 1167.04 | 254.15 | 832.41 |
+| high-cpu-all | 32814.08 | 29906.94 | 62853.12 |
+| lastpoint | 192045.05 | 153575.42 | NA   |
+| single-groupby-1-1-1 | 63.97 | 87.35 | 92.66 |
+| single-groupby-1-1-12 | 666.24 | 326.98 | 781.50 |
+| single-groupby-1-8-1 | 225.29 | 137.97 |281.95 |
+| single-groupby-5-1-1 | 70.40 | 81.64 | 86.15 |
+| single-groupby-5-1-12 | 722.75 | 356.01 | 805.18 |
+| single-groupby-5-8-1 | 285.60 | 115.88 | 326.29 |
--- a/docs/how-to/how-to-implement-sql-statement.md
+++ b/docs/how-to/how-to-implement-sql-statement.md
@@ -0,0 +1,74 @@
+This document introduces how to implement SQL statements in GreptimeDB.
+
+The execution entry point for SQL statements locates at Frontend Instance. You can see it has
+implemented `SqlQueryHandler`:
+
+```rust
+impl SqlQueryHandler for Instance {
+    type Error = Error;
+
+    async fn do_query(&self, query: &str, query_ctx: QueryContextRef) -> Vec<Result<Output>> {
+        // ...
+    }
+}
+```
+
+Normally, when a SQL query arrives at GreptimeDB, the `do_query` method will be called. After some parsing work, the SQL
+will be feed into `StatementExecutor`:
+
+```rust
+// in Frontend Instance:
+self.statement_executor.execute_sql(stmt, query_ctx).await
+```
+
+That's where we handle our SQL statements. You can just create a new match arm for your statement there, then the
+statement is implemented for both GreptimeDB Standalone and Cluster. You can see how `DESCRIBE TABLE` is implemented as
+an example.
+
+Now, what if the statements should be handled differently for GreptimeDB Standalone and Cluster? You can see there's
+a `SqlStatementExecutor` field in `StatementExecutor`. Each GreptimeDB Standalone and Cluster has its own implementation
+of `SqlStatementExecutor`. If you are going to implement the statements differently in the two mode (
+like `CREATE TABLE`), you have to implement them in their own `SqlStatementExecutor`s.
+
+Summarize as the diagram below:
+
+```text
+                             SQL query                            
+                                |                                
+                                v                                
+                  +---------------------------+                  
+                  | SqlQueryHandler::do_query |                  
+                  +---------------------------+                  
+                                |                                
+                                | SQL parsing                    
+                                v                                
+               +--------------------------------+                
+               | StatementExecutor::execute_sql |                
+               +--------------------------------+                
+                                |                                
+                                | SQL execution                    
+                                v                                
+               +----------------------------------+                
+               | commonly handled statements like |
+               | "plan_exec" for selection or     |
+               +----------------------------------+                
+                       |                |                        
+        For Standalone |                | For Cluster          
+                       v                v                        
+---------------------------+      +---------------------------+ 
+| SqlStatementExecutor impl |      | SqlStatementExecutor impl | 
+| in Datanode Instance      |      | in Frontend DistInstance  | 
+---------------------------+      +---------------------------+ 
+```
+
+Note that some SQL statements can be executed in our QueryEngine, in the form of `LogicalPlan`. You can follow the
+invocation path down to the `QueryEngine` implementation from `StatementExecutor::plan_exec`. For now, there's only
+one `DatafusionQueryEngine` for both GreptimeDB Standalone and Cluster. That lone query engine works for both modes is
+because GreptimeDB read/write data through `Table` trait, and each mode has its own `Table` implementation.
+
+We don't have any bias towards whether statements should be handled in query engine or `StatementExecutor`. You can
+implement one kind of statement in both places. For example, `Insert` with selection is handled in query engine, because
+we can easily do the query part there. However, `Insert` without selection is not, for the cost of parsing statement
+to `LogicalPlan` is not neglectable. So generally if the SQL query is simple enough, you can handle it
+in `StatementExecutor`; otherwise if it is complex or has some part of selection, it should be parsed to `LogicalPlan`
+and handled in query engine.  
--- a/docs/rfcs/2023-03-08-region-fault-tolerance.md
+++ b/docs/rfcs/2023-03-08-region-fault-tolerance.md
@@ -0,0 +1,196 @@
+---
+Feature Name: "Fault Tolerance for Region"
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/1126
+Date: 2023-03-08
+Author: "Luo Fucong <luofucong@greptime.com>"
+---
+
+Fault Tolerance for Region
+----------------------
+
+# Summary
+
+This RFC proposes a method to achieve fault tolerance for regions in GreptimeDB's distributed mode. Or, put it in another way, achieving region high availability("HA") for GreptimeDB cluster. 
+
+In this RFC, we mainly describe two aspects of region HA: how region availability is detected, and what recovery process is need to be taken. We also discuss some alternatives and future work.
+
+When this feature is done, our users could expect a GreptimeDB cluster that can always handle their requests to regions, despite some requests may failed during the region failover. The optimization to reduce the MTTR(Mean Time To Recovery) is not a concern of this RPC, and is left for future work.
+
+# Motivation
+
+Fault tolerance for regions is a critical feature for our clients to use the GreptimeDB cluster confidently. High availability for users to interact with their stored data is a "must have" for any TSDB products, that include our GreptimeDB cluster.
+
+# Details
+
+## Background
+
+Some backgrounds about region in distributed mode:
+
+- A table is logically split into multiple regions. Each region stores a part of non-overlapping table data.
+- Regions are distributed in Datanodes, the mappings are not static, are assigned and governed by Metasrv.
+- In distributed mode, client requests are scoped in regions. To be more specific, when a request that needs to scan multiple regions arrived in Frontend, Frontend splits the request into multiple sub-requests, each of which scans one region only, and submits them to Datanodes that hold corresponding regions. 
+
+In conclusion, as long as regions remain available, and regions could regain availability when failures do occur, the overall region HA could be achieved. With this in mind, let's see how region failures are detected first. 
+
+## Failure Detection
+
+We detect region failures in Metasrv, and do it both passively and actively. Passively means that Metasrv do not fire some "are you healthy" requests to regions. Instead, we carry region healthy information in the heartbeat requests that are submit to Metasrv by Datanodes. 
+
+Datanode already carries its regions stats in the heartbeat request (the non-relevant fields are omitted):
+
+```protobuf
+message HeartbeatRequest {
+  ...
+  // Region stats on this node
+  repeated RegionStat region_stats = 6;
+  ...
+}
+
+message RegionStat {
+  uint64 region_id = 1;
+  TableName table_name = 2;
+  ...
+}
+```
+
+For the sake of simplicity, we don't add another field `bool available = 3` to the `RegionStat` message; instead, if the region were unavailable in the view of the Datanode that contains it, the Datanode just not includes the `RegionStat` of it in the heartbeat request. Or, if the Datanode itself is not unavailable, the heartbeat request is not submitted, effectively the same with not carrying the `RegionStat`.
+
+> The heartbeat interval is now hardcoded to five seconds.
+
+Metasrv gathers the heartbeat requests, extracts the `RegionStat`s, and treat them as region heartbeat. In this way, Metasrv maintains all regions healthy information. If some region's heartbeats were not received in a period of time, Metasrv speculates the region might be unavailable. To make the decision whether a region is failed or not, Metasrv uses a failure detection algorithm called the "[Phi φ Accrual Failure Detection](https://medium.com/@arpitbhayani/phi-%CF%86-accrual-failure-detection-79c21ce53a7a)". Basically, the algorithm calculates a value called "phi" to represent the possibility of a region's unavailability, based on the historical heartbeats' arrived rate. Once the "phi" is above some pre-defined threshold, Metasrv knows the region is failed.
+
+> This algorithm has been widely adopted in some well known products, like Akka and Cassandra.
+
+When Metasrv decides some region is failed from heartbeats, it's not the final decision. Here comes the "actively" detection. Before Metasrv decides to do region failover, it actively invokes the healthy check interface of the Datanode that the failure region resides. Only this healthy check is failed does Metasrv actually start doing failover upon the region.
+
+To conclude, the failure detection pseudo-codes are like this:
+
+```rust
+// in Metasrv:
+fn failure_detection() {
+    loop {
+        // passive detection
+        let failed_regions = all_regions.iter().filter(|r| r.estimated_failure_possibility() > config.phi).collect();
+
+        // find the datanodes that contains the failed regions
+        let datanodes_and_regions = find_region_resides_datanodes(failed_regions);
+
+        // active detection  
+        for (datanode, regions) in datanodes_and_regions {
+            if !datanode.is_healthy(regions) {
+                do_failover(datanode, regions);
+            }
+        }
+
+        sleep(config.detect_interval);
+    }
+}
+```
+
+Some design considerations:
+
+- Why active detecting while we have passively detection? Because it could be happened that the network is singly connectable sometimes (especially in the complex Cloud environment), then the Datanode's heartbeats cannot reach Metasrv, while Metasrv could request Datanode. Active detecting avoid this false positive situation.
+- Why the detection works on region instead of Datanode? Because we might face the possibility that only part of the regions in the Datanode are not available, not ALL regions. Especially the situation that Datanodes are used by multiple tenants. If this is the case, it's better to do failover upon the designated regions instead of the whole regions that reside on the Datanode. All in all, we want a more subtle control over region failover. 
+
+So we detect some regions are not available. How to regain the availability back?
+
+## Region Failover
+
+Region Failover largely relies on remote WAL, aka "[Bunshin](https://github.com/GreptimeTeam/bunshin)". I'm not including any of the details of it in this RFC, let's just assume we already have it.
+
+In general, region failover is fairly simple. Once Metasrv decides to do failover upon some regions, it first chooses one or more Datanodes to hold the failed region. This can be done easily, as the Metasrv already has the whole picture of Datanodes: it knows which Datanode has the minimum regions, what Datanode historically had the lowest CPU usage and IO rate, and how the Datanodes are assigned to tenants, among other information that can all help the Metasrv choose the most suitable Datanodes. Let's call these chosen Datanodes as "candidates".
+
+> The strategy to choose the most suitable candidates required careful design, but it's another RFC.
+
+Then, Metasrv sets the states of these failed regions as "passive". We should add a field to `Region`:
+
+```protobuf
+message Region {
+  uint64 id = 1;
+  string name = 2;
+  Partition partition = 3;
+  
+  message State {
+    Active,
+    Passive,
+  }
+  State state = 4;
+  
+  map<string, string> attrs = 100;
+}
+```
+
+Here `Region` is used in message `RegionRoute`, which indicates how the write request is split among regions. When a region is set as "passive", Frontend knows the write to it should be rejected at the moment (the region read is not blocked, however).
+
+> Making a region "passive" here is effectively blocking the write to it. It's ok in the failover situation, the region is failed anyway. However, when dealing with active maintenance operations, region state requires more refined design. But that's another story. 
+
+Third, Metasrv fires the "close region" requests to the failed Datanodes, and fires the "open region" requests to those candidates. "Close region" requests might be failed due to the unavailability of Datanodes, but that's fine, it's just a best-effort attempt to reduce the chance of any in-flight writes got handled unintentionally after the region is set as "passive". The "open region" requests must have succeeded though. Datanodes open regions from remote WAL.
+
+> Currently the "close region" is undefined in Datanode. It could be a local cache clean up of region data or other resources tidy up.
+
+Finally, when a candidate successfully opens its region, it calls back to Metasrv, indicating it is ready to handle region. "call back" here is backed by its heartbeat to Metasrv. Metasrv updates the region's state to "active", so as to let Frontend lifts the restrictions of region writes (again, the read part of region is untouched).
+
+All the above steps should be managed by remote procedure framework. It's another implementation challenge in the region failover feature. (One is the remote WAL of course.)
+
+A picture is worth a 1000 words:
+
+```text
+                                    +-------------------------+                                        
+                                    | Metasrv detects region  |                                        
+                                    | failure                 |                                        
+                                    +-------------------------+                                        
+                                                 |                                                     
+                                                 v                                                     
+                                    +----------------------------+                                        
+                                    | Metasrv chooses candidates |                                        
+                                    | to hold failed regions     |                                        
+                                    +----------------------------+                                        
+                                                 |                                                     
+                                                 v                                                     
+                                    +-------------------------+       +-------------------------+      
+                                    | Metasrv "passive" the   |------>| Frontend rejects writes |      
+                                    | failed regions          |       | to "passive" regions    |      
+                                    +-------------------------+       +-------------------------+      
+                                                 |                                                     
+                                                 v                                                     
+--------------------------+        +---------------------------+                                        
+| Candidate Datanodes open |<-------| Metasrv fires "close" and |                                        
+| regions from remote WAL  |        | "open" region requests    |                                        
+--------------------------+        +---------------------------+                                        
+             |                                                                                         
+             |                                                                                         
+             |                      +-------------------------+       +-------------------------+      
+             +--------------------->| Metasrv "active" the    |------>| Frontend lifts write    |      
+                                    | failed regions          |       | restriction to regions  |      
+                                    +-------------------------+       +-------------------------+      
+                                                 |                                                     
+                                                 v                                                     
+                                    +-------------------------+                                        
+                                    | Region failover done,   |                                        
+                                    | HA regain               |                                        
+                                    +-------------------------+                                        
+```
+
+# Alternatives
+
+## The "Neon" Way
+
+Remote WAL raises a problem that could harm the write throughput of GreptimeDB cluster: each write request has to do at least two remote call, one is from Frontend to Datanode, and one is from Datanode to remote WAL. What if we do it the "[Neon](https://github.com/neondatabase/neon)" way, making remote WAL sits in between the Frontend and Datanode, couldn't that improve our write throughput? It could, though there're some consistency issues like "read-your-writes" to solve.
+
+However, the main concerns we don't adopt this method are two-fold:
+
+1. Remote WAL is planned to be quorum based, it can be efficiently written;
+2. More importantly, we are planning to make the remote WAL an option that users could choose not to enable it (at the cost of some reliability reduction).
+
+## No WAL, Replication instead
+
+This method replicates region across Datanodes directly, like the common way in shared-nothing database. Were the main region failed, a standby region in the replicate group is elected as new "main" and take the read/write requests. The main concern to this method is the incompatibility to our current architecture and code structure. It requires a major redesign, but gains no significant advantage over the remote WAL method. 
+
+However, the replication does have its own advantage that we can learn from to optimize this failover procedure.
+
+# Future Work
+
+Some optimizations we could take:
+
+- To reduce the MTTR, we could make Metasrv chooses the candidate to each region at normal time. The candidate does some preparation works to reduce the open region time, effectively accelerate the failover procedure.
+- We can adopt the replication method, to the degree that region replicas are used as the fast catch-up candidates. The data difference among replicas is minor, region failover does not need to load or exchange too much data, greatly reduced the region failover time.
--- a/docs/rfcs/2023-03-29-file-external-table/external-table-engine-overview.png
+++ b/docs/rfcs/2023-03-29-file-external-table/external-table-engine-overview.png
--- a/docs/rfcs/2023-03-29-file-external-table/external-table-engine-way-2.png
+++ b/docs/rfcs/2023-03-29-file-external-table/external-table-engine-way-2.png
--- a/docs/rfcs/2023-03-29-file-external-table/rfc.md
+++ b/docs/rfcs/2023-03-29-file-external-table/rfc.md
@@ -0,0 +1,174 @@
+---
+Feature Name: "File external table"
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/1041
+Date: 2023-03-08
+Author: "Xu Wenkang <wenymedia@gmail.com>"
+---
+
+File external table
+
+---
+
+# Summary
+
+Allows users to perform SQL queries on files
+
+# Motivation
+
+User data may already exist in other storages, i.g., file systems/s3, etc. in CSV, parquet, JSON format, etc. We can provide users the ability to perform SQL queries on these files.
+
+
+# Details
+
+## Overview
+
+The file external table providers users ability to perform SQL queries on these files.
+
+For example, a user has a CSV file on the local file system `/var/data/city.csv`:
+
+```
+Rank , Name , State , 2023 Population , 2020 Census , Annual Change , Density (mi²)
+1 , New York City , New York , 8,992,908 , 8,804,190 , 0.7% , 29,938
+2 , Los Angeles , California , 3,930,586 , 3,898,747 , 0.27% , 8,382
+3 , Chicago , Illinois , 2,761,625 , 2,746,388 , 0.18% , 12,146
+.....
+```
+
+Then user can create a file external table with:
+
+```sql
+CREATE EXTERNAL TABLE city with(location='/var/data/city.csv', format="CSV", field_delimiter = ',', record_delimiter = '\n', skip_header = 1);
+```
+
+Then query the external table with:
+
+```bash
+MySQL> select * from city;
+```
+
+| Rank | Name          | State      | 2023 Population | 2020 Census | Annual Change | Density (mi²) |
+| :--- | :------------ | :--------- | :-------------- | :---------- | :------------ | :------------ |
+| 1    | New York City | New York   | 8,992,908       | 8,804,190   | 0.7%          | 29,938        |
+| 2    | Los Angeles   | California | 3,930,586       | 3,898,747   | 0.27%         | 8,382         |
+| 3    | Chicago       | Illinois   | 2,761,625       | 2,746,388   | 0.18%         | 12,146        |
+
+Drop the external table, if needs with:
+
+```sql
+DROP EXTERNAL TABLE city
+```
+
+
+### Syntax
+
+```
+CREATE EXTERNAL [<database>.]<table_name>
+[
+ (
+    <col_name> <col_type> [NULL | NOT NULL] [COMMENT "<comment>"]
+ )
+]
+[ WITH
+ (
+     LOCATION = 'url'
+   [,FIELD_DELIMITER =  'delimiter' ]
+   [,RECORD_DELIMITER =  'delimiter' ]
+   [,SKIP_HEADER =  '<number>' ]
+   [,FORMAT =  { csv | json | parquet } ]
+   [,PATTERN = '<regex_pattern>' ]
+   [,ENDPOINT = '<uri>' ]
+   [,ACCESS_KEY_ID = '<key_id>' ]
+   [,SECRET_ACCESS_KEY = '<access_key>' ]
+   [,SESSION_TOKEN = '<token>' ]
+   [,REGION = '<region>' ]
+   [,ENABLE_VIRTUAL_HOST_STYLE = '<boolean>']
+   ..
+ )
+]
+```
+
+### Supported File Format
+
+The external file table supports multiple formats; We divide formats into row format and columnar format.
+
+Row formats:
+
+- CSV, JSON
+
+Columnar formats:
+
+- Parquet
+
+Some of these formats support filter pushdown, and others don't. If users use very large files, that format doesn't support pushdown, which might consume a lot of IO for scanning full files and cause a long running query.
+
+### File Table Engine
+
+![overview](external-table-engine-overview.png)
+
+We implement a file table engine that creates an external table by accepting user-specified file paths and treating all records as immutable.
+
+1. File Format Decoder: decode files to the `RecordBatch` stream.
+2. File Table Engine: implement the `TableProvider` trait, store necessary metadata in memory, and provide scan ability.
+
+Our implementation is better for small files. For large files(i.g., a GB-level CSV file), suggests our users import data to the database.
+
+## Drawbacks
+
+- Some formats don't support filter pushdown
+- Hard to support indexing
+
+## Life cycle
+
+### Register a table
+
+1. Write metadata to manifest.
+2. Create the table via file table engine.
+3. Register table to `CatalogProvider` and register table to `SystemCatalog`(persist tables to disk).
+
+### Deregister a table (Drop a table)
+
+1. Fetch the target table info (figure out table engine type).
+2. Deregister the target table in `CatalogProvider` and `SystemCatalog`.
+3. Find the target table engine.
+4. Drop the target table.
+
+### Recover a table when restarting
+
+1. Collect tables name and engine type info.
+2. Find the target tables in different engines.
+3. Open and register tables.
+
+# Alternatives
+
+## Using DataFusion API
+
+We can use datafusion API to register a file table:
+
+```rust
+let ctx = SessionContext::new();
+
+ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
+
+// create a plan
+let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?;
+```
+
+### Drawbacks
+
+The DataFusion implements its own `Object Store` abstraction and supports parsing the partitioned directories, which can push down the filter and skips some directories. However, this makes it impossible to use our's `LruCacheLayer`(The parsing of the partitioned directories required paths as input). If we want to manage memory entirely, we should implement our own `TableProvider` or `Table`.
+
+- Impossible to use `CacheLayer`
+
+## Introduce an intermediate representation layer
+
+![overview](external-table-engine-way-2.png)
+
+We convert all files into `parquet` as an intermediate representation. Then we only need to implement a `parquet` file table engine, and we already have a similar one. Also, it supports limited filter pushdown via the `parquet` row group stats.
+
+### Drawbacks
+
+- Computing overhead
+- Storage overhead
+
+
+
--- a/docs/rfcs/2023-05-09-distributed-planner.md
+++ b/docs/rfcs/2023-05-09-distributed-planner.md
@@ -0,0 +1,137 @@
+---
+Feature Name: distributed-planner
+Tracking Issue: TBD
+Date: 2023-05-09
+Author: "Ruihang Xia <waynestxia@gmail.com>"
+---
+
+Distributed Planner
+-------------------
+# Summary
+Enhance the logical planner with aware of distributed, multi-region table topology. To achieve "push computation down" execution rather than the current "pull data up" manner.
+
+# Motivation
+Query distributively can leverage the advantage of GreptimeDB's architecture to process large dataset that exceeds the capacity of a single node, or accelerate the query execution by executing it in parallel. This task includes two sub-tasks
+  - Be able to transform the plan that can push as much as possible computation down to data source.
+  - Be able to handle pipeline breaker (like `Join` or `Sort`) on multiple computation nodes.
+This is a relatively complex topic. To keep this RFC concentrated I'll focus on the first one.
+
+# Details
+## Background: Partition and Region
+GreptimeDB supports table partitioning, where the partition rule is set during table creation. Each partition can be further divided into one or more physical storage units known as "regions". Both partitions and regions are divided based on rows:
+``` text
+┌────────────────────────────────────┐
+│                                    │
+│               Table                │
+│                                    │
+└─────┬────────────┬────────────┬────┘
+      │            │            │
+      │            │            │
+┌─────▼────┐ ┌─────▼────┐ ┌─────▼────┐
+│ Region 1 │ │ Region 2 │ │ Region 3 │
+└──────────┘ └──────────┘ └──────────┘
+  Row 1~10     Row 11~20    Row 21~30
+```
+General speaking, region is the minimum element of data distribution, and we can also use it as the unit to distribute computation. This can greatly simplify the routing logic of this distributed planner, by always schedule the computation to the node that currently opening the corresponding region. And is also easy to scale more node for computing since GreptimeDB's data is persisted on shared storage backend like S3. But this is a bit beyond the scope of this specific topic.
+## Background: Commutativity
+Commutativity is an attribute that describes whether two operation can exchange their apply order: $P1(P2(R)) \Leftrightarrow P2(P1(R))$. If the equation keeps, we can transform one expression into another form without changing its result. This is useful on rewriting SQL expression, and is the theoretical basis of this RFC.
+
+Take this SQL as an example
+
+``` sql
+SELECT a FROM t WHERE a > 10;
+```
+
+As we know projection and filter are commutative (todo: latex), it can be translated to the following two identical plan trees:
+
+```text
+┌─────────────┐       ┌─────────────┐
+│Projection(a)│       │Filter(a>10) │
+└──────▲──────┘       └──────▲──────┘
+       │                     │
+┌──────┴──────┐       ┌──────┴──────┐
+│Filter(a>10) │       │Projection(a)│
+└──────▲──────┘       └──────▲──────┘
+       │                     │
+┌──────┴──────┐       ┌──────┴──────┐
+│  TableScan  │       │  TableScan  │
+└─────────────┘       └─────────────┘
+```
+
+## Merge Operation
+
+This RFC proposes to add a new expression node `MergeScan` to merge result from several regions in the frontend. It wrap the abstraction of remote data and execution, and expose a `TableScan` interface to upper level.
+
+``` text
+        ▲
+        │
+┌───────┼───────┐
+│       │       │
+│    ┌──┴──┐    │
+│    └──▲──┘    │
+│       │       │
+│    ┌──┴──┐    │
+│    └──▲──┘    │    ┌─────────────────────────────┐
+│       │       │    │                             │
+│  ┌────┴────┐  │    │ ┌──────────┐ ┌───┐    ┌───┐ │
+│  │MergeScan◄──┼────┤ │ Region 1 │ │   │ .. │   │ │
+│  └─────────┘  │    │ └──────────┘ └───┘    └───┘ │
+│               │    │                             │
+└─Frontend──────┘    └─Remote-Sources──────────────┘
+```
+This merge operation simply chains all the the underlying remote data sources and return `RecordBatch`, just like a coalesce op. And each remote sources is a gRPC query to datanode via the substrait logical plan interface. The plan is transformed and divided from the original query that comes to frontend.
+
+## Commutativity of MergeScan
+
+Obviously, The position of `MergeScan` is the key of the distributed plan. The more closer to the underlying `TableScan`, the less computation is taken by datanodes. Thus the goal is to pull the `MergeScan` up as more as possible. The word "pull up" means exchange `MergeScan` with its parent node in the plan tree, which means we should check the commutativity between the existing expression nodes and the `MergeScan`. Here I classify all the possibility into five categories:
+
+- Commutative: $P1(P2(R)) \Leftrightarrow P2(P1(R))$
+  - filter
+  - projection
+  - operations that match the partition key
+- Partial Commutative: $P1(P2(R)) \Leftrightarrow P1(P2(P1(R)))$
+  - $min(R) \rightarrow min(MERGE(min(R)))$
+  - $max(R) \rightarrow max(MERGE(max(R)))$
+- Conditional Commutative: $P1(P2(R)) \Leftrightarrow P3(P2(P1(R)))$
+  - $count(R) \rightarrow sum(count(R))$
+- Transformed Commutative: $P1(P2(R)) \Leftrightarrow P1(P3(R)) \Leftrightarrow P3(P1(R))$
+  - $avg(R) \rightarrow sum(R)/count(R)$
+- Non-commutative
+  - sort
+  - join
+  - percentile
+## Steps to plan
+After establishing the set of commutative relations for all expressions, we can begin transforming the logical plan. There are four steps:
+
+  - Add a merge node before table scan
+  - Evaluate commutativity in a bottom-up way, stop at the first non-commutative node
+  - Divide the TableScan to scan over partitions
+  - Execute
+
+First insert the `MergeScan` on top of the bottom `TableScan` node. Then examine the commutativity start from the `MergeScan` node transform the plan tree based on the result. Stop this process on the first non-commutative node.
+``` text
+                  ┌─────────────┐   ┌─────────────┐
+                  │    Sort     │   │    Sort     │
+                  └──────▲──────┘   └──────▲──────┘
+                         │                 │
+┌─────────────┐   ┌──────┴──────┐   ┌──────┴──────┐
+│    Sort     │   │Projection(a)│   │  MergeScan  │
+└──────▲──────┘   └──────▲──────┘   └──────▲──────┘
+       │                 │                 │
+┌──────┴──────┐   ┌──────┴──────┐   ┌──────┴──────┐
+│Projection(a)│   │  MergeScan  │   │Projection(a)│
+└──────▲──────┘   └──────▲──────┘   └──────▲──────┘
+       │                 │                 │
+┌──────┴──────┐   ┌──────┴──────┐   ┌──────┴──────┐
+│  TableScan  │   │  TableScan  │   │  TableScan  │
+└─────────────┘   └─────────────┘   └─────────────┘
+      (a)               (b)               (c)
+```
+Then in the physical planning phase, convert the sub-tree below `MergeScan` into a remote query request and dispatch to all the regions. And let the `MergeScan` to receive the results and feed to it parent node.
+
+To simplify the overall complexity, any error in the procedure will lead to a failure to the entire query, and cancel all other parts.
+# Alternatives
+## Spill
+If only consider the ability of processing large dataset, we can enable DataFusion's spill ability to temporary persist intermediate data into disk, like the "swap" memory. But this will lead to a super slow performance and very large write amplification.
+# Future Work
+As described in the `Motivation` section we can further explore the distributed planner on the physical execution level, by introducing mechanism like Spark's shuffle to improve parallelism and reduce intermediate pipeline breaker's stage.
--- a/docs/rfcs/2023-07-06-table-engine-refactor.md
+++ b/docs/rfcs/2023-07-06-table-engine-refactor.md
@@ -0,0 +1,303 @@
+---
+Feature Name: table-engine-refactor
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/1869
+Date: 2023-07-06
+Author: "Yingwen <realevenyag@gmail.com>"
+---
+
+Refactor Table Engine
+----------------------
+
+# Summary
+Refactor table engines to address several historical tech debts.
+
+# Motivation
+Both `Frontend` and `Datanode` have to deal with multiple regions in a table. This results in code duplication and additional burden to the `Datanode`.
+
+Before:
+
+
+```mermaid
+graph TB
+
+subgraph Frontend["Frontend"]
+    subgraph MyTable
+        A("region 0, 2 -> Datanode0")
+        B("region 1, 3 -> Datanode1")
+    end
+end
+
+MyTable --> MetaSrv
+MetaSrv --> ETCD
+
+MyTable-->TableEngine0
+MyTable-->TableEngine1
+
+subgraph Datanode0
+    Procedure0("procedure")
+    TableEngine0("table engine")
+    region0
+    region2
+    mytable0("my_table")
+
+    Procedure0-->mytable0
+    TableEngine0-->mytable0
+    mytable0-->region0
+    mytable0-->region2
+end
+
+
+subgraph Datanode1
+    Procedure1("procedure")
+    TableEngine1("table engine")
+    region1
+    region3
+    mytable1("my_table")
+
+    Procedure1-->mytable1
+    TableEngine1-->mytable1
+    mytable1-->region1
+    mytable1-->region3
+end
+
+
+subgraph manifest["table manifest"]
+    M0("my_table")
+    M1("regions: [0, 1, 2, 3]")
+end
+
+mytable1-->manifest
+mytable0-->manifest
+
+RegionManifest0("region manifest 0")
+RegionManifest1("region manifest 1")
+RegionManifest2("region manifest 2")
+RegionManifest3("region manifest 3")
+region0-->RegionManifest0
+region1-->RegionManifest1
+region2-->RegionManifest2
+region3-->RegionManifest3
+```
+
+`Datanodes` can update the same manifest file for a table as regions are assigned to different nodes in the cluster. We also have to run procedures on `Datanode` to ensure the table manifest is consistent with region manifests. "Table" in a `Datanode` is a subset of the table's regions. The `Datanode` is much closer to `RegionServer` in `HBase` which only deals with regions.
+
+In cluster mode, we store table metadata in etcd and table manifest. The table manifest becomes redundant. We can remove the table manifest if we refactor the table engines to region engines that only care about regions. What's more, we don't need to run those procedures on `Datanode`.
+
+After:
+```mermaid
+graph TB
+
+subgraph Frontend["Frontend"]
+    direction LR
+    subgraph MyTable
+        A("region 0, 2 -> Datanode0")
+        B("region 1, 3 -> Datanode1")
+    end
+end
+
+MyTable --> MetaSrv
+MetaSrv --> ETCD
+
+MyTable-->RegionEngine
+MyTable-->RegionEngine1
+
+subgraph Datanode0
+    RegionEngine("region engine")
+    region0
+    region2
+    RegionEngine-->region0
+    RegionEngine-->region2
+end
+
+
+subgraph Datanode1
+    RegionEngine1("region engine")
+    region1
+    region3
+    RegionEngine1-->region1
+    RegionEngine1-->region3
+end
+
+RegionManifest0("region manifest 0")
+RegionManifest1("region manifest 1")
+RegionManifest2("region manifest 2")
+RegionManifest3("region manifest 3")
+region0-->RegionManifest0
+region1-->RegionManifest1
+region2-->RegionManifest2
+region3-->RegionManifest3
+```
+This RFC proposes to refactor table engines into region engines as a first step to make the `Datanode` acts like a `RegionServer`.
+
+
+# Details
+## Overview
+
+We plan to refactor the `TableEngine` trait into `RegionEngine` gradually. This RFC focuses on the `mito` engine as it is the default table engine and the most complicated engine.
+
+Currently, we built `MitoEngine` upon `StorageEngine` that manages regions of the `mito` engine. Since `MitoEngine` becomes a region engine, we could combine `StorageEngine` with `MitoEngine` to simplify our code structure.
+
+The chart below shows the overall architecture of the `MitoEngine`.
+
+```mermaid
+classDiagram
+class MitoEngine~LogStore~ {
+    -WorkerGroup workers
+}
+class MitoRegion {
+    +VersionControlRef version_control
+    -RegionId region_id
+    -String manifest_dir
+    -AtomicI64 last_flush_millis
+    +region_id() RegionId
+    +scan() ChunkReaderImpl
+}
+class RegionMap {
+    -HashMap&lt;RegionId, MitoRegionRef&gt; regions
+}
+class ChunkReaderImpl
+
+class WorkerGroup {
+    -Vec~RegionWorker~ workers
+}
+class RegionWorker {
+    -RegionMap regions
+    -Sender sender
+    -JoinHandle handle
+}
+class RegionWorkerThread~LogStore~ {
+    -RegionMap regions
+    -Receiver receiver
+    -Wal~LogStore~ wal
+    -ObjectStore object_store
+    -MemtableBuilderRef memtable_builder
+    -FlushSchedulerRef~LogStore~ flush_scheduler
+    -FlushStrategy flush_strategy
+    -CompactionSchedulerRef~LogStore~ compaction_scheduler
+    -FilePurgerRef file_purger
+}
+class Wal~LogStore~ {
+    -LogStore log_store
+}
+class MitoConfig
+
+MitoEngine~LogStore~ o-- MitoConfig
+MitoEngine~LogStore~ o-- MitoRegion
+MitoEngine~LogStore~ o-- WorkerGroup
+MitoRegion o-- VersionControl
+MitoRegion -- ChunkReaderImpl
+WorkerGroup o-- RegionWorker
+RegionWorker o-- RegionMap
+RegionWorker -- RegionWorkerThread~LogStore~
+RegionWorkerThread~LogStore~ o-- RegionMap
+RegionWorkerThread~LogStore~ o-- Wal~LogStore~
+```
+
+We replace the `RegionWriter` with `RegionWorker` to process write requests and DDL requests.
+
+
+## Metadata
+We also merge region's metadata with table's metadata. It should make metadata much easier to maintain.
+```mermaid
+classDiagram
+class VersionControl {
+    -CowCell~Version~ version
+    -AtomicU64 committed_sequence
+}
+class Version {
+    -RegionMetadataRef metadata
+    -MemtableVersionRef memtables
+    -LevelMetasRef ssts
+    -SequenceNumber flushed_sequence
+    -ManifestVersion manifest_version
+}
+class MemtableVersion {
+    -MemtableRef mutable
+    -Vec~MemtableRef~ immutables
+    +mutable_memtable() MemtableRef
+    +immutable_memtables() &[MemtableRef]
+    +freeze_mutable(MemtableRef new_mutable) MemtableVersion
+}
+class LevelMetas {
+    -LevelMetaVec levels
+    -AccessLayerRef sst_layer
+    -FilePurgerRef file_purger
+    -Option~i64~ compaction_time_window
+}
+class LevelMeta {
+    -Level level
+    -HashMap&lt;FileId, FileHandle&gt; files
+}
+class FileHandle {
+    -FileMeta meta
+    -bool compacting
+    -AtomicBool deleted
+    -AccessLayerRef sst_layer
+    -FilePurgerRef file_purger
+}
+class FileMeta {
+    +RegionId region_id
+    +FileId file_id
+    +Option&lt;Timestamp, Timestamp&gt; time_range
+    +Level level
+    +u64 file_size
+}
+
+VersionControl o-- Version
+Version o-- RegionMetadata
+Version o-- MemtableVersion
+Version o-- LevelMetas
+LevelMetas o-- LevelMeta
+LevelMeta o-- FileHandle
+FileHandle o-- FileMeta
+
+class RegionMetadata {
+    +RegionId region_id
+    +VersionNumber version
+    +SchemaRef table_schema
+    +Vec~usize~ primary_key_indices
+    +Vec~usize~ value_indices
+    +ColumnId next_column_id
+    +TableOptions region_options
+    +DateTime~Utc~ created_on
+    +RegionSchemaRef region_schema
+}
+class RegionSchema {
+    -SchemaRef user_schema
+    -StoreSchemaRef store_schema
+    -ColumnsMetadataRef columns
+}
+class Schema
+class StoreSchema {
+    -Vec~ColumnMetadata~ columns
+    -SchemaRef schema
+    -usize row_key_end
+    -usize user_column_end
+}
+class ColumnsMetadata {
+    -Vec~ColumnMetadata~ columns
+    -HashMap&lt;String, usize&gt; name_to_col_index
+    -usize row_key_end
+    -usize timestamp_key_index
+    -usize user_column_end
+}
+class ColumnMetadata
+
+RegionMetadata o-- RegionSchema
+RegionMetadata o-- Schema
+RegionSchema o-- StoreSchema
+RegionSchema o-- Schema
+RegionSchema o-- ColumnsMetadata
+StoreSchema o-- ColumnsMetadata
+StoreSchema o-- Schema
+StoreSchema o-- ColumnMetadata
+ColumnsMetadata o-- ColumnMetadata
+```
+
+# Drawback
+This is a breaking change.
+
+# Future Work
+- Rename `TableEngine` to `RegionEngine`
+- Simplify schema relationship in the `mito` engine
+- Refactor the `Datanode` into a `RegionServer`.
--- a/docs/rfcs/2023-07-10-metric-engine.md
+++ b/docs/rfcs/2023-07-10-metric-engine.md
@@ -0,0 +1,202 @@
+---
+Feature Name: metric-engine
+Tracking Issue: TBD
+Date: 2023-07-10
+Author: "Ruihang Xia <waynestxia@gmail.com>"
+---
+
+# Summary
+
+A new metric engine that can significantly enhance our ability to handle the tremendous number of small tables in scenarios like Prometheus metrics, by leveraging a synthetic wide table that offers storage and metadata multiplexing capabilities over the existing engine.
+
+# Motivation
+
+The concept "Table" in GreptimeDB is a bit "heavy" compared to other time-series storage like Prometheus or VictoriaMetrics. This has lots of disadvantages in aspects from performance, footprint, and storage to cost.
+
+# Details
+
+## Top level description
+
+- User Interface
+
+    This feature will add a new type of storage engine. It might be available to be an option like `with ENGINE=mito` or an internal interface like auto create table on Prometheus remote write. From the user side, there is no difference from tables in mito engine. All the DDL like `CREATE`, `ALTER` and DML like `SELECT` should be supported.
+
+- Implementation Overlook
+
+    This new engine doesn't re-implement low level components like file R/W etc. It's a wrapper layer over the existing mito engine, with extra storage and metadata multiplexing capabilities. I.e., it expose multiple table based on one mito engine table like this:
+	``` plaintext
+	   ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
+	   │ Metric Engine │ │ Metric Engine │ │ Metric Engine │
+	   │   Table 1     │ │   Table 2     │ │   Table 3     │
+	   └───────────────┘ └───────────────┘ └───────────────┘
+	           ▲               ▲                   ▲
+	           │               │                   │
+	           └───────────────┼───────────────────┘
+	                           │
+	                 ┌─────────┴────────┐
+	                 │ Metric Region    │
+	                 │   Engine         │
+	                 │    ┌─────────────┤
+	                 │    │ Mito Region │
+	                 │    │   Engine    │
+	                 └────▲─────────────┘
+	                      │
+	                      │
+	                ┌─────┴───────────────┐
+	                │                     │
+	                │  Mito Engine Table  │
+	                │                     │
+	                └─────────────────────┘
+	```
+
+The following parts will describe these implementation details:
+    - How to route these metric region tables and how those table are distributed
+    - How to maintain the schema and other metadata of the underlying mito engine table
+    - How to maintain the schema of metric engine table
+    - How the query goes
+
+## Routing
+
+Before this change, the region route rule was based on a group of partition keys. Relation of physical table to region is one-to-many.
+
+``` rust
+  pub struct PartitionDef {
+      partition_columns: Vec<String>,
+      partition_bounds: Vec<PartitionBound>,
+  }
+```
+
+And for metric engine tables, the key difference is we split the concept of "physical table" and "logical table". Like the previous ASCII chart, multiple logical tables are based on one physical table. The relationship of logical table to region becomes many-to-many. Thus, we must include the table name (of logical table) into partition rules.
+
+Consider the partition/route interface is a generic map of string array to region id, all we need to do is to insert logical table name into the request:
+
+``` rust
+  fn route(request: Vec<String>) -> RegionId;
+```
+
+The next question is, where to do this conversion? The basic idea is to dispatch different routing behavior based on the engine type. Since we have all the necessary information in frontend, it's a good place to do that. And can leave meta server untouched. The essential change is to associate engine type with route rule.
+
+## Physical Region Schema
+
+The idea "physical wide table" is to perform column-level multiplexing. I.e., map all logical columns to physical columns by their names.
+
+```
+   ┌────────────┐      ┌────────────┐         ┌────────────┐
+   │   Table 1  │      │   Table 2  │         │   Table 3  │
+   ├───┬────┬───┤      ├───┬────┬───┤         ├───┬────┬───┤
+   │C1 │ C2 │ C3│      │C1 │ C3 │ C5├──────┐  │C2 │ C4 │ C6│
+   └─┬─┴──┬─┴─┬─┘ ┌────┴───┴──┬─┴───┘      │  └─┬─┴──┬─┴─┬─┘
+     │    │   │   │           │            │    │    │   │
+     │    │   │   │           └──────────┐ │    │    │   │
+     │    │   │   │                      │ │    │    │   │
+     │    │   │   │  ┌─────────────────┐ │ │    │    │   │
+     │    │   │   │  │ Physical Table  │ │ │    │    │   │
+     │    │   │   │  ├──┬──┬──┬──┬──┬──┘ │ │    │    │   │
+     └────x───x───┴─►│C1│C2│C3│C4│C5│C6◄─┼─x────x────x───┘
+          │   │      └──┘▲─┘▲─┴─▲└─▲└──┘ │ │    │    │
+          │   │          │  │   │  │     │ │    │    │
+          ├───x──────────┘  ├───x──x─────┘ │    │    │
+          │   │             │   │  │       │    │    │
+          │   └─────────────┘   │  └───────┘    │    │
+          │                     │               │    │
+          └─────────────────────x───────────────┘    │
+                                │                    │
+                                └────────────────────┘
+```
+
+This approach is very straightforward but has one problem. It only works when two columns have different semantic type (time index, tag or field) or data types but with the same name. E.g., `CREATE TABLE t1 (c1 timestamp(3) TIME INDEX)` and `CREATE TABLE t2 (c1 STRING PRIMARY KEY)`.
+
+One possible workaround is to prefix each column with its data type and semantic type, like `_STRING_PK_c1`. However, considering the primary goal at present is to support data from monitoring metrics like Prometheus remote write, it's acceptable not to support this at first because data types are often simple and limited here.
+
+
+The next point is changing the physical table's schema. This is only needed when creating a new logical table or altering the existing table. Typically speaking, table creating and altering are explicit. We only need to emit an add column request to underlying physical table on processing logical table's DDL. GreptimeDB can create or alter table automatically on some protocols, but the internal logic is the same.
+
+Also for simplicity, we don't support shrinking the underlying table at first. This can be achieved by introducing mechanism on the physical column.
+
+Frontend needs not to keep physical table's schema.
+
+## Metadata of physical regions
+
+Those metric engine regions need to store extra metadata like the schema of logical table or all logical table's name. That information is relatively simple and can be stored in a format like key-value pair. For now, we have to use another physical mito region for metadata. This involves an issue with region scheduling. Since we don't have the ability to perform affinity scheduling, the initial version will just assume the data region and metadata region are in the same instance. See alternatives - other storage for physical region's metadata for possible future improvement.
+
+Here is the schema of metadata region and how we would use it. The `CREATE TABLE` clause of metadata region looks like the following. Notice that it wouldn't be actually created by SQL.
+
+``` sql
+  CREATE TABLE metadata(
+  	ts timestamp time index,
+    	key string primary key,
+    	value string
+  );
+```
+
+The `ts` field is just a placeholder -- for the constraints that a mito region must contain a time index field. It will always be `0`. The other two fields `key` and `value` will be used as a k-v storage. It contains two group of key
+    - `__table_<TABLE_NAME>` is used for marking table existence. It doesn't have value.
+    - `__column_<TABLE_NAME>_<COLUMN_NAME>` is used for marking table existence, the value is column's semantic type.
+
+## Physical region implementation
+
+This RFC proposes to add a new region implementation named "MetricRegion". As showed in the first chart, it's wrapped over the existing mito region. This section will describe the implementation details. Firstly, here is a chart shows how the region hierarchy looks like:
+
+```plaintext
+ ┌───────────────────────┐
+ │ Metric Region         │
+ │                       │
+ │   ┌────────┬──────────┤
+ │   │ Mito   │ Mito     │
+ │   │ Region │ Region   │
+ │   │ for    │ for      │
+ │   │ Data   │ Metadata │
+ └───┴────────┴──────────┘
+```
+
+All upper levels only see the Metric Region. E.g., Meta Server schedules on this region, or Frontend routes requests to this Metrics Region's id. To be scheduled (open or close etc.), Metric Region needs to implement its own procedures. Most of those procedures can be simply assembled from underlying Mito Regions', but those related to data like alter or drop will have its own new logic.
+
+Another point is region id. Since the region id is used widely from meta server to persisted state, it's better to keep it unchanged. This means we can't use the same id for two regions, but one for each. To achieve this, this RFC proposes a concept named "region id group". A region id group is a group of region ids that are bound for different purposes. Like the two underlying regions here. 
+
+This preserves the first 8 bits of the `u32` region number for grouping. Each group has one main id (the first one) and other sub ids (the rest non-zero ids). All components other than the region implementation itself doesn't aware of the existence of region id group. They only see the main id. The region implementation is in response of managing and using the region id group.
+
+```plaintext
+63                                  31         23                  0
+┌────────────────────────────────────┬──────────┬──────────────────┐
+│          Table Id(32)              │ Group(8) │ Region Number(24)│
+└────────────────────────────────────┴──────────┴──────────────────┘
+                                            Region Id(32)
+```
+
+## Routing in meta server
+
+From previous sections, we can conclude the following points about routing:
+- Each "logical table" has its own, universe unique table id.
+- Logical table doesn't have physical region, they share the same physical region with other logical tables.
+- Route rule of logical table's is a strict subset of physical table's.
+
+To associate the logical table with physical region, we need to specify necessary information in the create table request. Specifically, the table type and its parent table. This require to change our gRPC proto's definition. And once meta recognize the table to create is a logical table, it will use the parent table's region to create route entry.
+
+And to reduce the consumption of region failover (which need to update the physical table route info), we'd better to split the current route table structure into two parts:
+
+```rust
+region_route: Map<TableName, [RegionId]>,
+node_route: Map<RegionId, NodeId>,
+```
+
+By doing this on each failover the meta server only needs to update the second `node_route` map and leave the first one untouched.
+
+## Query
+
+Like other existing components, a user query always starts in the frontend. In the planning phase, frontend needs to fetch related schemas of the queried table. This part is the same. I.e., changes in this RFC don't affect components above the `Table` abstraction.
+
+# Alternatives
+
+## Other routing method
+
+We can also do this "special" route rule in the meta server. But there is no difference with the proposed method.
+
+## Other storage for physical region's metadata
+
+Once we have implemented the "region family" that allows multiple physical schemas exist in one region, we can store the metadata and table data into one region.
+
+Before that, we can also let the `MetricRegion` holds a `KvBackend` to access the storage layer directly. But this breaks the abstraction in some way.
+
+# Drawbacks
+
+Since the physical storage is mixed together. It's hard to do fine-grained operations in table level. Like configuring TTL, memtable size or compaction strategy in table level. Or define different partition rules for different tables. For scenarios like this, it's better to move the table out of metrics engine and "upgrade" it to a normal mito engine table. This requires a migration process in a low cost. And we have to ensure data consistency during the migration, which may require a out-of-service period.
--- a/docs/schema-structs.md
+++ b/docs/schema-structs.md
@@ -0,0 +1,527 @@
+# Schema Structs
+
+# Common Schemas
+The `datatypes` crate defines the elementary schema struct to describe the metadata.
+
+## ColumnSchema
+[ColumnSchema](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/datatypes/src/schema/column_schema.rs#L36) represents the metadata of a column. It is equivalent to arrow's [Field](https://docs.rs/arrow/latest/arrow/datatypes/struct.Field.html) with additional metadata such as default constraint and whether the column is a time index. The time index is the column with a `TIME INDEX` constraint of a table. We can convert the `ColumnSchema` into an arrow `Field` and convert the `Field` back to the `ColumnSchema` without losing metadata.
+
+```rust
+pub struct ColumnSchema {
+    pub name: String,
+    pub data_type: ConcreteDataType,
+    is_nullable: bool,
+    is_time_index: bool,
+    default_constraint: Option<ColumnDefaultConstraint>,
+    metadata: Metadata,
+}
+```
+
+## Schema
+[Schema](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/datatypes/src/schema.rs#L38) is an ordered sequence of `ColumnSchema`. It is equivalent to arrow's [Schema](https://docs.rs/arrow/latest/arrow/datatypes/struct.Schema.html) with additional metadata including the index of the time index column and the version of this schema. Same as `ColumnSchema`, we can convert our `Schema` from/to arrow's `Schema`.
+
+```rust
+use arrow::datatypes::Schema as ArrowSchema;
+
+pub struct Schema {
+    column_schemas: Vec<ColumnSchema>,
+    name_to_index: HashMap<String, usize>,
+    arrow_schema: Arc<ArrowSchema>,
+    timestamp_index: Option<usize>,
+    version: u32,
+}
+
+pub type SchemaRef = Arc<Schema>;
+```
+
+We alias `Arc<Schema>` as `SchemaRef` since it is used frequently. Mostly, we use our `ColumnSchema` and `Schema` structs instead of Arrow's `Field` and `Schema` unless we need to invoke third-party libraries (like DataFusion or ArrowFlight) that rely on Arrow.
+
+## RawSchema
+`Schema` contains fields like a map from column names to their indices in the `ColumnSchema` sequences and a cached arrow `Schema`. We can construct these fields from the `ColumnSchema` sequences thus we don't want to serialize them. This is why we don't derive `Serialize` and `Deserialize` for `Schema`. We introduce a new struct [RawSchema](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/datatypes/src/schema/raw.rs#L24) which keeps all required fields of a `Schema` and derives the serialization traits. To serialize a `Schema`, we need to convert it into a `RawSchema` first and serialize the `RawSchema`.
+
+```rust
+pub struct RawSchema {
+    pub column_schemas: Vec<ColumnSchema>,
+    pub timestamp_index: Option<usize>,
+    pub version: u32,
+}
+```
+
+We want to keep the `Schema` simple and avoid putting too much business-related metadata in it as many different structs or traits rely on it.
+
+# Schema of the Table
+A table maintains its schema in [TableMeta](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/table/src/metadata.rs#L97).
+```rust
+pub struct TableMeta {
+    pub schema: SchemaRef,
+    pub primary_key_indices: Vec<usize>,
+    pub value_indices: Vec<usize>,
+    // ...
+}
+```
+
+The order of columns in `TableMeta::schema` is the same as the order specified in the `CREATE TABLE` statement which users use to create this table.
+
+The field `primary_key_indices` stores indices of primary key columns. The field `value_indices` records the indices of value columns (non-primary key and time index, we sometimes call them field columns).
+
+Suppose we create a table with the following SQL
+```sql
+CREATE TABLE cpu (
+    ts TIMESTAMP,
+    host STRING,
+    usage_user DOUBLE,
+    usage_system DOUBLE,
+    datacenter STRING,
+    TIME INDEX (ts),
+    PRIMARY KEY(datacenter, host)) ENGINE=mito WITH(regions=1);
+```
+
+Then the table's `TableMeta` may look like this:
+```json
+{
+    "schema":{
+        "column_schemas":[
+            "ts",
+            "host",
+            "usage_user",
+            "usage_system",
+            "datacenter"
+        ],
+        "time_index":0,
+        "version":0
+    },
+    "primary_key_indices":[
+        4,
+        1
+    ],
+    "value_indices":[
+        2,
+        3
+    ]
+}
+```
+
+
+# Schemas of the storage engine
+We split a table into one or more units with the same schema and then store these units in the storage engine. Each unit is a region in the storage engine.
+
+The storage engine maintains schemas of regions in more complicated ways because it
+- adds internal columns that are invisible to users to store additional metadata for each row
+- provides a data model similar to the key-value model so it organizes columns in a different order
+- maintains additional metadata like column id or column family
+
+So the storage engine defines several schema structs:
+- RegionSchema
+- StoreSchema
+- ProjectedSchema
+
+## RegionSchema
+A [RegionSchema](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/storage/src/schema/region.rs#L37) describes the schema of a region.
+
+```rust
+pub struct RegionSchema {
+    user_schema: SchemaRef,
+    store_schema: StoreSchemaRef,
+    columns: ColumnsMetadataRef,
+}
+```
+
+Each region reserves some columns called `internal columns` for internal usage:
+- `__sequence`, sequence number of a row
+- `__op_type`, operation type of a row, such as `PUT` or `DELETE`
+- `__version`, user-specified version of a row, reserved but not used. We might remove this in the future
+
+The table engine can't see the `__sequence` and `__op_type` columns, so the `RegionSchema` itself maintains two internal schemas:
+- User schema, a `Schema` struct that doesn't have internal columns
+- Store schema, a `StoreSchema` struct that has internal columns
+
+The `ColumnsMetadata` struct keeps metadata about all columns but most time we only need to use metadata in user schema and store schema, so we just ignore it. We may remove this struct in the future.
+
+`RegionSchema` organizes columns in the following order:
+```
+key columns, timestamp, [__version,] value columns, __sequence, __op_type
+```
+
+We can ignore the `__version` column because it is disabled now:
+
+```
+key columns, timestamp, value columns, __sequence, __op_type
+```
+
+Key columns are columns of a table's primary key. Timestamp is the time index column. A region sorts all rows by key columns, timestamp, sequence, and op type.
+
+So the `RegionSchema` of our `cpu` table above looks like this:
+```json
+{
+    "user_schema":[
+        "datacenter",
+        "host",
+        "ts",
+        "usage_user",
+        "usage_system"
+    ],
+    "store_schema":[
+        "datacenter",
+        "host",
+        "ts",
+        "usage_user",
+        "usage_system",
+        "__sequence",
+        "__op_type"
+    ]
+}
+```
+
+## StoreSchema
+As described above, a [StoreSchema](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/storage/src/schema/store.rs#L36) is a schema that knows all internal columns.
+```rust
+struct StoreSchema {
+    columns: Vec<ColumnMetadata>,
+    schema: SchemaRef,
+    row_key_end: usize,
+    user_column_end: usize,
+}
+```
+
+The columns in the `columns` and `schema` fields have the same order. The `ColumnMetadata` has metadata like column id, column family id, and comment. The `StoreSchema` also stores this metadata in `StoreSchema::schema`, so we can convert the `StoreSchema` between arrow's `Schema`. We use this feature to persist the `StoreSchema` in the SST since our SST format is `Parquet`, which can take arrow's `Schema` as its schema.
+
+The `StoreSchema` of the region above is similar to this:
+```json
+{
+    "schema":{
+        "column_schemas":[
+            "datacenter",
+            "host",
+            "ts",
+            "usage_user",
+            "usage_system",
+            "__sequence",
+            "__op_type"
+        ],
+        "time_index":2,
+        "version":0
+    },
+    "row_key_end":3,
+    "user_column_end":5
+}
+```
+
+The key and timestamp columns form row keys of rows. We put them together so we can use `row_key_end` to get indices of all row key columns. Similarly, we can use the `user_column_end` to get indices of all user columns (non-internal columns).
+```rust
+impl StoreSchema {
+    #[inline]
+    pub(crate) fn row_key_indices(&self) -> impl Iterator<Item = usize> {
+        0..self.row_key_end
+    }
+
+    #[inline]
+    pub(crate) fn value_indices(&self) -> impl Iterator<Item = usize> {
+        self.row_key_end..self.user_column_end
+    }
+}
+```
+
+Another useful feature of `StoreSchema` is that we ensure it always contains key columns, a timestamp column, and internal columns because we need them to perform merge, deduplication, and delete. Projection on `StoreSchema` only projects value columns.
+
+## ProjectedSchema
+To support arbitrary projection, we introduce the [ProjectedSchema](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/storage/src/schema/projected.rs#L106).
+```rust
+pub struct ProjectedSchema {
+    projection: Option<Projection>,
+    schema_to_read: StoreSchemaRef,
+    projected_user_schema: SchemaRef,
+}
+```
+
+We need to handle many cases while doing projection:
+- The columns' order of table and region is different
+- The projection can be in arbitrary order, e.g. `select usage_user, host from cpu` and `select host, usage_user from cpu` have different projection order
+- We support `ALTER TABLE` so data files may have different schemas.
+
+### Projection
+Let's take an example to see how projection works. Suppose we want to select `ts`, `usage_system` from the `cpu` table.
+
+```sql
+CREATE TABLE cpu (
+    ts TIMESTAMP,
+    host STRING,
+    usage_user DOUBLE,
+    usage_system DOUBLE,
+    datacenter STRING,
+    TIME INDEX (ts),
+    PRIMARY KEY(datacenter, host)) ENGINE=mito WITH(regions=1);
+
+select ts, usage_system from cpu;
+```
+
+The query engine uses the projection `[0, 3]` to scan the table. However, columns in the region have a different order, so the table engine adjusts the projection to `2, 4`.
+```json
+{
+    "user_schema":[
+        "datacenter",
+        "host",
+        "ts",
+        "usage_user",
+        "usage_system"
+    ],
+}
+```
+
+As you can see, the output order is still `[ts, usage_system]`. This is the schema users can see after projection so we call it `projected user schema`.
+
+But the storage engine also needs to read key columns, a timestamp column, and internal columns. So we maintain a `StoreSchema` after projection in the `ProjectedSchema`.
+
+The `Projection` struct is a helper struct to help compute the projected user schema and store schema.
+
+So we can construct the following `ProjectedSchema`:
+```json
+{
+    "schema_to_read":{
+        "schema":{
+            "column_schemas":[
+                "datacenter",
+                "host",
+                "ts",
+                "usage_system",
+                "__sequence",
+                "__op_type"
+            ],
+            "time_index":2,
+            "version":0
+        },
+        "row_key_end":3,
+        "user_column_end":4
+    },
+    "projected_user_schema":{
+        "column_schemas":[
+            "ts",
+            "usage_system"
+        ],
+        "time_index":0
+    }
+}
+```
+
+As you can see, `schema_to_read` doesn't contain the column `usage_user` that is not intended to be read (not in projection).
+
+### ReadAdapter
+As mentioned above, we can alter a table so the underlying files (SSTs) and memtables in the storage engine may have different schemas.
+
+To simplify the logic of `ProjectedSchema`, we handle the difference between schemas before projection (constructing the `ProjectedSchema`). We introduce [ReadAdapter](https://github.com/GreptimeTeam/greptimedb/blob/9fa871a3fad07f583dc1863a509414da393747f8/src/storage/src/schema/compat.rs#L90) that adapts rows with different source schemas to the same expected schema.
+
+So we can always use the current `RegionSchema` of the region to construct the `ProjectedSchema`, and then create a `ReadAdapter` for each memtable or SST.
+```rust
+#[derive(Debug)]
+pub struct ReadAdapter {
+    source_schema: StoreSchemaRef,
+    dest_schema: ProjectedSchemaRef,
+    indices_in_result: Vec<Option<usize>>,
+    is_source_needed: Vec<bool>,
+}
+```
+
+For each column required by `dest_schema`, `indices_in_result` stores the index of that column in the row read from the source memtable or SST. If the source row doesn't contain that column, the index is `None`.
+
+The field `is_source_needed` stores whether a column in the source memtable or SST is needed.
+
+Suppose we add a new column `usage_idle` to the table `cpu`.
+```sql
+ALTER TABLE cpu ADD COLUMN usage_idle DOUBLE;
+```
+
+The new `StoreSchema` becomes:
+```json
+{
+    "schema":{
+        "column_schemas":[
+            "datacenter",
+            "host",
+            "ts",
+            "usage_user",
+            "usage_system",
+            "usage_idle",
+            "__sequence",
+            "__op_type"
+        ],
+        "time_index":2,
+        "version":1
+    },
+    "row_key_end":3,
+    "user_column_end":6
+}
+```
+
+Note that we bump the version of the schema to 1.
+
+If we want to select `ts`, `usage_system`, and `usage_idle`. While reading from the old schema, the storage engine creates a `ReadAdapter` like this:
+```json
+{
+    "source_schema":{
+        "schema":{
+            "column_schemas":[
+                "datacenter",
+                "host",
+                "ts",
+                "usage_user",
+                "usage_system",
+                "__sequence",
+                "__op_type"
+            ],
+            "time_index":2,
+            "version":0
+        },
+        "row_key_end":3,
+        "user_column_end":5
+    },
+    "dest_schema":{
+        "schema_to_read":{
+            "schema":{
+                "column_schemas":[
+                    "datacenter",
+                    "host",
+                    "ts",
+                    "usage_system",
+                    "usage_idle",
+                    "__sequence",
+                    "__op_type"
+                ],
+                "time_index":2,
+                "version":1
+            },
+            "row_key_end":3,
+            "user_column_end":5
+        },
+        "projected_user_schema":{
+            "column_schemas":[
+                "ts",
+                "usage_system",
+                "usage_idle"
+            ],
+            "time_index":0
+        }
+    },
+    "indices_in_result":[
+        0,
+        1,
+        2,
+        3,
+        null,
+        4,
+        5
+    ],
+    "is_source_needed":[
+        true,
+        true,
+        true,
+        false,
+        true,
+        true,
+        true
+    ]
+}
+```
+
+We don't need to read `usage_user` so `is_source_needed[3]` is false. The old schema doesn't have column `usage_idle` so `indices_in_result[4]` is `null` and the `ReadAdapter` needs to insert a null column to the output row so the output schema still contains `usage_idle`.
+
+The figure below shows the relationship between `RegionSchema`, `StoreSchema`, `ProjectedSchema`, and `ReadAdapter`.
+
+```text
+                   ┌──────────────────────────────┐
+                   │                              │
+                   │    ┌────────────────────┐    │
+                   │    │    store_schema    │    │
+                   │    │                    │    │
+                   │    │     StoreSchema    │    │
+                   │    │      version 1     │    │
+                   │    └────────────────────┘    │
+                   │                              │
+                   │    ┌────────────────────┐    │
+                   │    │     user_schema    │    │
+                   │    └────────────────────┘    │
+                   │                              │
+                   │         RegionSchema         │
+                   │                              │
+                   └──────────────┬───────────────┘
+                                  │
+                                  │
+                                  │
+                   ┌──────────────▼───────────────┐
+                   │                              │
+                   │ ┌──────────────────────────┐ │
+                   │ │     schema_to_read       │ │
+                   │ │                          │ │
+                   │ │  StoreSchema (projected) │ │
+                   │ │       version 1          │ │
+                   │ └──────────────────────────┘ │
+               ┌───┤                              ├───┐
+               │   │ ┌──────────────────────────┐ │   │
+               │   │ │  projected_user_schema   │ │   │
+               │   │ └──────────────────────────┘ │   │
+               │   │                              │   │
+               │   │       ProjectedSchema        │   │
+  dest schema  │   └──────────────────────────────┘   │   dest schema
+               │                                      │
+               │                                      │
+        ┌──────▼───────┐                      ┌───────▼──────┐
+        │              │                      │              │
+        │  ReadAdapter │                      │  ReadAdapter │
+        │              │                      │              │
+        └──────▲───────┘                      └───────▲──────┘
+               │                                      │
+               │                                      │
+source schema  │                                      │  source schema
+               │                                      │
+       ┌───────┴─────────┐                   ┌────────┴────────┐
+       │                 │                   │                 │
+       │ ┌─────────────┐ │                   │ ┌─────────────┐ │
+       │ │             │ │                   │ │             │ │
+       │ │ StoreSchema │ │                   │ │ StoreSchema │ │
+       │ │             │ │                   │ │             │ │
+       │ │  version 0  │ │                   │ │  version 1  │ │
+       │ │             │ │                   │ │             │ │
+       │ └─────────────┘ │                   │ └─────────────┘ │
+       │                 │                   │                 │
+       │      SST 0      │                   │      SST 1      │
+       │                 │                   │                 │
+       └─────────────────┘                   └─────────────────┘
+```
+
+# Conversion
+This figure shows the conversion between schemas:
+```text
+              ┌─────────────┐     schema                      From             ┌─────────────┐
+              │             ├──────────────────┐  ┌────────────────────────────►             │
+              │  TableMeta  │                  │  │                            │  RawSchema  │
+              │             │                  │  │  ┌─────────────────────────┤             │
+              └─────────────┘                  │  │  │        TryFrom          └─────────────┘
+                                               │  │  │
+                                               │  │  │
+                                               │  │  │
+                                               │  │  │
+                                               │  │  │
+    ┌───────────────────┐                ┌─────▼──┴──▼──┐   arrow_schema()    ┌─────────────────┐
+    │                   │                │              ├─────────────────────►                 │
+    │  ColumnsMetadata  │          ┌─────►    Schema    │                     │   ArrowSchema   ├──┐
+    │                   │          │     │              ◄─────────────────────┤                 │  │
+    └────┬───────────▲──┘          │     └───▲───▲──────┘       TryFrom       └─────────────────┘  │
+         │           │             │         │   │                                                 │
+         │           │             │         │   └────────────────────────────────────────┐        │
+         │           │             │         │                                            │        │
+         │   columns │    user_schema()      │                                            │        │
+         │           │             │         │ projected_user_schema()                 schema()    │
+         │           │             │         │                                            │        │
+         │       ┌───┴─────────────┴─┐       │                 ┌────────────────────┐     │        │
+columns  │       │                   │       └─────────────────┤                    │     │        │  TryFrom
+         │       │    RegionSchema   │                         │   ProjectedSchema  │     │        │
+         │       │                   ├─────────────────────────►                    │     │        │
+         │       └─────────────────┬─┘  ProjectedSchema::new() └──────────────────┬─┘     │        │
+         │                         │                                              │       │        │
+         │                         │                                              │       │        │
+         │                         │                                              │       │        │
+         │                         │                                              │       │        │
+    ┌────▼────────────────────┐    │               store_schema()            ┌────▼───────┴──┐     │
+    │                         │    └─────────────────────────────────────────►               │     │
+    │   Vec<ColumnMetadata>   │                                              │  StoreSchema  ◄─────┘
+    │                         ◄──────────────────────────────────────────────┤               │
+    └─────────────────────────┘                     columns                  └───────────────┘
+```
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2023-02-26"
+channel = "nightly-2023-05-03"
--- a/scripts/fetch-dashboard-assets.sh
+++ b/scripts/fetch-dashboard-assets.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# This script is used to download built dashboard assets from the "GreptimeTeam/dashboard" repository.
+
+set -e
+
+declare -r SCRIPT_DIR=$(cd $(dirname ${0}) >/dev/null 2>&1 && pwd)
+declare -r ROOT_DIR=$(dirname ${SCRIPT_DIR})
+declare -r STATIC_DIR="$ROOT_DIR/src/servers/dashboard"
+OUT_DIR="${1:-$SCRIPT_DIR}"
+
+RELEASE_VERSION="$(cat $STATIC_DIR/VERSION)"
+
+echo "Downloading assets to dir: $OUT_DIR"
+cd $OUT_DIR
+# Download the SHA256 checksum attached to the release. To verify the integrity
+# of the download, this checksum will be used to check the download tar file
+# containing the built dashboard assets.
+curl -Ls https://github.com/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/sha256.txt --output sha256.txt
+
+# Download the tar file containing the built dashboard assets.
+curl -L https://github.com/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz --output build.tar.gz
+
+# Verify the checksums match; exit if they don't.
+case "$(uname -s)" in
+    FreeBSD | Darwin)
+        echo "$(cat sha256.txt)" | shasum --algorithm 256 --check \
+            || { echo "Checksums did not match for downloaded dashboard assets!"; exit 1; } ;;
+    Linux)
+        echo "$(cat sha256.txt)" | sha256sum --check -- \
+            || { echo "Checksums did not match for downloaded dashboard assets!"; exit 1; } ;;
+    *)
+        echo "The '$(uname -s)' operating system is not supported as a build host for the dashboard" >&2
+        exit 1
+esac
+
+# Extract the assets and clean up.
+tar -xzf build.tar.gz -C "$STATIC_DIR"
+rm sha256.txt
+rm build.tar.gz
+
+echo "Successfully download dashboard assets to $STATIC_DIR"
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -51,13 +51,26 @@ get_os_type
 get_arch_type

 if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
-    echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
-
+    # Use the latest nightly version.
    if [ "${VERSION}" = "latest" ]; then
-        wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/latest/download/${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz"
-    else
-        wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz"
+        VERSION=$(curl -s -XGET "https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases" | grep tag_name | grep nightly | cut -d: -f 2 | sed 's/.*"\(.*\)".*/\1/' | uniq | sort -r | head -n 1)
+        if [ -z "${VERSION}" ]; then
+            echo "Failed to get the latest version."
+            exit 1
+        fi
    fi

-    tar xvf ${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz && rm ${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz && echo "Run './${BIN} --help' to get started"
+    echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
+    PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
+
+    if [ -n "${PACKAGE_NAME}" ]; then
+      wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"
+
+      # Extract the binary and clean the rest.
+      tar xvf "${PACKAGE_NAME}" && \
+      mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
+      rm -r "${PACKAGE_NAME}" && \
+      rm -r "${PACKAGE_NAME%.tar.gz}" && \
+      echo "Run './${BIN} --help' to get started"
+    fi
 fi
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -5,15 +5,14 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-arrow-flight.workspace = true
 common-base = { path = "../common/base" }
 common-error = { path = "../common/error" }
 common-time = { path = "../common/time" }
 datatypes = { path = "../datatypes" }
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "3a715150563b89d5dfc81a5838eac1f66a5658a1" }
+greptime-proto.workspace = true
 prost.workspace = true
 snafu = { version = "0.7", features = ["backtraces"] }
 tonic.workspace = true

 [build-dependencies]
-tonic-build = "0.8"
+tonic-build = "0.9"
--- a/src/api/src/error.rs
+++ b/src/api/src/error.rs
@@ -15,10 +15,10 @@
 use std::any::Any;

 use common_error::ext::ErrorExt;
-use common_error::prelude::StatusCode;
+use common_error::status_code::StatusCode;
 use datatypes::prelude::ConcreteDataType;
 use snafu::prelude::*;
-use snafu::{Backtrace, ErrorCompat};
+use snafu::Location;

 pub type Result<T> = std::result::Result<T, Error>;

@@ -26,12 +26,12 @@ pub type Result<T> = std::result::Result<T, Error>;
 #[snafu(visibility(pub))]
 pub enum Error {
    #[snafu(display("Unknown proto column datatype: {}", datatype))]
-    UnknownColumnDataType { datatype: i32, backtrace: Backtrace },
+    UnknownColumnDataType { datatype: i32, location: Location },

    #[snafu(display("Failed to create column datatype from {:?}", from))]
    IntoColumnDataType {
        from: ConcreteDataType,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display(
@@ -41,7 +41,7 @@ pub enum Error {
    ))]
    ConvertColumnDefaultConstraint {
        column: String,
-        #[snafu(backtrace)]
+        location: Location,
        source: datatypes::error::Error,
    },

@@ -52,7 +52,7 @@ pub enum Error {
    ))]
    InvalidColumnDefaultConstraint {
        column: String,
-        #[snafu(backtrace)]
+        location: Location,
        source: datatypes::error::Error,
    },
 }
@@ -66,9 +66,6 @@ impl ErrorExt for Error {
            | Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(),
        }
    }
-    fn backtrace_opt(&self) -> Option<&Backtrace> {
-        ErrorCompat::backtrace(self)
-    }

    fn as_any(&self) -> &dyn Any {
        self
--- a/src/api/src/helper.rs
+++ b/src/api/src/helper.rs
@@ -15,9 +15,13 @@
 use common_base::BitVec;
 use common_time::timestamp::TimeUnit;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::types::TimestampType;
+use datatypes::types::{TimeType, TimestampType};
 use datatypes::value::Value;
 use datatypes::vectors::VectorRef;
+use greptime_proto::v1::ddl_request::Expr;
+use greptime_proto::v1::greptime_request::Request;
+use greptime_proto::v1::query_request::Query;
+use greptime_proto::v1::{DdlRequest, QueryRequest};
 use snafu::prelude::*;

 use crate::error::{self, Result};
@@ -67,6 +71,10 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
            ColumnDataType::TimestampNanosecond => {
                ConcreteDataType::timestamp_nanosecond_datatype()
            }
+            ColumnDataType::TimeSecond => ConcreteDataType::time_second_datatype(),
+            ColumnDataType::TimeMillisecond => ConcreteDataType::time_millisecond_datatype(),
+            ColumnDataType::TimeMicrosecond => ConcreteDataType::time_microsecond_datatype(),
+            ColumnDataType::TimeNanosecond => ConcreteDataType::time_nanosecond_datatype(),
        }
    }
 }
@@ -91,13 +99,20 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
            ConcreteDataType::String(_) => ColumnDataType::String,
            ConcreteDataType::Date(_) => ColumnDataType::Date,
            ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
-            ConcreteDataType::Timestamp(unit) => match unit {
+            ConcreteDataType::Timestamp(t) => match t {
                TimestampType::Second(_) => ColumnDataType::TimestampSecond,
                TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
                TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
                TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
            },
-            ConcreteDataType::Null(_)
+            ConcreteDataType::Time(t) => match t {
+                TimeType::Second(_) => ColumnDataType::TimeSecond,
+                TimeType::Millisecond(_) => ColumnDataType::TimeMillisecond,
+                TimeType::Microsecond(_) => ColumnDataType::TimeMicrosecond,
+                TimeType::Nanosecond(_) => ColumnDataType::TimeNanosecond,
+            },
+            ConcreteDataType::Interval(_)
+            | ConcreteDataType::Null(_)
            | ConcreteDataType::List(_)
            | ConcreteDataType::Dictionary(_) => {
                return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
@@ -185,6 +200,22 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values
            ts_nanosecond_values: Vec::with_capacity(capacity),
            ..Default::default()
        },
+        ColumnDataType::TimeSecond => Values {
+            time_second_values: Vec::with_capacity(capacity),
+            ..Default::default()
+        },
+        ColumnDataType::TimeMillisecond => Values {
+            time_millisecond_values: Vec::with_capacity(capacity),
+            ..Default::default()
+        },
+        ColumnDataType::TimeMicrosecond => Values {
+            time_microsecond_values: Vec::with_capacity(capacity),
+            ..Default::default()
+        },
+        ColumnDataType::TimeNanosecond => Values {
+            time_nanosecond_values: Vec::with_capacity(capacity),
+            ..Default::default()
+        },
    }
 }

@@ -219,17 +250,58 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
            TimeUnit::Microsecond => values.ts_microsecond_values.push(val.value()),
            TimeUnit::Nanosecond => values.ts_nanosecond_values.push(val.value()),
        },
-        Value::List(_) => unreachable!(),
+        Value::Time(val) => match val.unit() {
+            TimeUnit::Second => values.time_second_values.push(val.value()),
+            TimeUnit::Millisecond => values.time_millisecond_values.push(val.value()),
+            TimeUnit::Microsecond => values.time_microsecond_values.push(val.value()),
+            TimeUnit::Nanosecond => values.time_nanosecond_values.push(val.value()),
+        },
+        Value::Interval(_) | Value::List(_) => unreachable!(),
    });
    column.null_mask = null_mask.into_vec();
 }

+/// Returns the type name of the [Request].
+pub fn request_type(request: &Request) -> &'static str {
+    match request {
+        Request::Inserts(_) => "inserts",
+        Request::Query(query_req) => query_request_type(query_req),
+        Request::Ddl(ddl_req) => ddl_request_type(ddl_req),
+        Request::Delete(_) => "delete",
+    }
+}
+
+/// Returns the type name of the [QueryRequest].
+fn query_request_type(request: &QueryRequest) -> &'static str {
+    match request.query {
+        Some(Query::Sql(_)) => "query.sql",
+        Some(Query::LogicalPlan(_)) => "query.logical_plan",
+        Some(Query::PromRangeQuery(_)) => "query.prom_range",
+        None => "query.empty",
+    }
+}
+
+/// Returns the type name of the [DdlRequest].
+fn ddl_request_type(request: &DdlRequest) -> &'static str {
+    match request.expr {
+        Some(Expr::CreateDatabase(_)) => "ddl.create_database",
+        Some(Expr::CreateTable(_)) => "ddl.create_table",
+        Some(Expr::Alter(_)) => "ddl.alter",
+        Some(Expr::DropTable(_)) => "ddl.drop_table",
+        Some(Expr::FlushTable(_)) => "ddl.flush_table",
+        Some(Expr::CompactTable(_)) => "ddl.compact_table",
+        Some(Expr::TruncateTable(_)) => "ddl.truncate_table",
+        None => "ddl.empty",
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::sync::Arc;

    use datatypes::vectors::{
-        BooleanVector, TimestampMicrosecondVector, TimestampMillisecondVector,
+        BooleanVector, TimeMicrosecondVector, TimeMillisecondVector, TimeNanosecondVector,
+        TimeSecondVector, TimestampMicrosecondVector, TimestampMillisecondVector,
        TimestampNanosecondVector, TimestampSecondVector,
    };

@@ -292,6 +364,10 @@ mod tests {
        let values = values_with_capacity(ColumnDataType::TimestampMillisecond, 2);
        let values = values.ts_millisecond_values;
        assert_eq!(2, values.capacity());
+
+        let values = values_with_capacity(ColumnDataType::TimeMillisecond, 2);
+        let values = values.time_millisecond_values;
+        assert_eq!(2, values.capacity());
    }

    #[test]
@@ -360,6 +436,10 @@ mod tests {
            ConcreteDataType::timestamp_millisecond_datatype(),
            ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
        );
+        assert_eq!(
+            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
+            ColumnDataTypeWrapper(ColumnDataType::TimeMillisecond).into()
+        );
    }

    #[test]
@@ -488,9 +568,50 @@ mod tests {
        );
    }

+    #[test]
+    fn test_column_put_time_values() {
+        let mut column = Column {
+            column_name: "test".to_string(),
+            semantic_type: 0,
+            values: Some(Values {
+                ..Default::default()
+            }),
+            null_mask: vec![],
+            datatype: 0,
+        };
+
+        let vector = Arc::new(TimeNanosecondVector::from_vec(vec![1, 2, 3]));
+        push_vals(&mut column, 3, vector);
+        assert_eq!(
+            vec![1, 2, 3],
+            column.values.as_ref().unwrap().time_nanosecond_values
+        );
+
+        let vector = Arc::new(TimeMillisecondVector::from_vec(vec![4, 5, 6]));
+        push_vals(&mut column, 3, vector);
+        assert_eq!(
+            vec![4, 5, 6],
+            column.values.as_ref().unwrap().time_millisecond_values
+        );
+
+        let vector = Arc::new(TimeMicrosecondVector::from_vec(vec![7, 8, 9]));
+        push_vals(&mut column, 3, vector);
+        assert_eq!(
+            vec![7, 8, 9],
+            column.values.as_ref().unwrap().time_microsecond_values
+        );
+
+        let vector = Arc::new(TimeSecondVector::from_vec(vec![10, 11, 12]));
+        push_vals(&mut column, 3, vector);
+        assert_eq!(
+            vec![10, 11, 12],
+            column.values.as_ref().unwrap().time_second_values
+        );
+    }
+
    #[test]
    fn test_column_put_vector() {
-        use crate::v1::column::SemanticType;
+        use crate::v1::SemanticType;
        // Some(false), None, Some(true), Some(true)
        let mut column = Column {
            column_name: "test".to_string(),
--- a/src/api/src/lib.rs
+++ b/src/api/src/lib.rs
@@ -15,7 +15,7 @@
 pub mod error;
 pub mod helper;

-pub mod prometheus {
+pub mod prom_store {
    pub mod remote {
        pub use greptime_proto::prometheus::remote::*;
    }
@@ -23,4 +23,5 @@ pub mod prometheus {

 pub mod v1;

+pub use greptime_proto;
 pub use prost::DecodeError;
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -4,15 +4,19 @@ version.workspace = true
 edition.workspace = true
 license.workspace = true

+[features]
+testing = []
+
 [dependencies]
 api = { path = "../api" }
 arc-swap = "1.0"
+arrow-schema.workspace = true
 async-stream.workspace = true
 async-trait = "0.1"
-backoff = { version = "0.4", features = ["tokio"] }
 common-catalog = { path = "../common/catalog" }
 common-error = { path = "../common/error" }
 common-grpc = { path = "../common/grpc" }
+common-meta = { path = "../common/meta" }
 common-query = { path = "../common/query" }
 common-recordbatch = { path = "../common/recordbatch" }
 common-runtime = { path = "../common/runtime" }
@@ -23,21 +27,24 @@ datafusion.workspace = true
 datatypes = { path = "../datatypes" }
 futures = "0.3"
 futures-util.workspace = true
-lazy_static = "1.4"
+lazy_static.workspace = true
 meta-client = { path = "../meta-client" }
+metrics.workspace = true
+moka = { version = "0.11", features = ["future"] }
 parking_lot = "0.12"
-regex = "1.6"
+regex.workspace = true
 serde = "1.0"
 serde_json = "1.0"
 session = { path = "../session" }
 snafu = { version = "0.7", features = ["backtraces"] }
-storage = { path = "../storage" }
+store-api = { path = "../store-api" }
 table = { path = "../table" }
 tokio.workspace = true

 [dev-dependencies]
-common-test-util = { path = "../common/test-util" }
+catalog = { path = ".", features = ["testing"] }
 chrono.workspace = true
+common-test-util = { path = "../common/test-util" }
 log-store = { path = "../log-store" }
 mito = { path = "../mito", features = ["test"] }
 object-store = { path = "../object-store" }
--- a/src/catalog/src/error.rs
+++ b/src/catalog/src/error.rs
@@ -16,25 +16,34 @@ use std::any::Any;
 use std::fmt::Debug;

 use common_error::ext::{BoxedError, ErrorExt};
-use common_error::prelude::{Snafu, StatusCode};
+use common_error::status_code::StatusCode;
 use datafusion::error::DataFusionError;
 use datatypes::prelude::ConcreteDataType;
-use snafu::{Backtrace, ErrorCompat};
+use snafu::{Location, Snafu};
+use tokio::task::JoinError;

 use crate::DeregisterTableRequest;

 #[derive(Debug, Snafu)]
 #[snafu(visibility(pub))]
 pub enum Error {
+    #[snafu(display(
+        "Failed to re-compile script due to internal error, source: {}",
+        source
+    ))]
+    CompileScriptInternal {
+        location: Location,
+        source: BoxedError,
+    },
    #[snafu(display("Failed to open system catalog table, source: {}", source))]
    OpenSystemCatalog {
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

    #[snafu(display("Failed to create system catalog table, source: {}", source))]
    CreateSystemCatalog {
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

@@ -45,12 +54,12 @@ pub enum Error {
    ))]
    CreateTable {
        table_info: String,
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

    #[snafu(display("System catalog is not valid: {}", msg))]
-    SystemCatalog { msg: String, backtrace: Backtrace },
+    SystemCatalog { msg: String, location: Location },

    #[snafu(display(
        "System catalog table type mismatch, expected: binary, found: {:?}",
@@ -58,77 +67,93 @@ pub enum Error {
    ))]
    SystemCatalogTypeMismatch {
        data_type: ConcreteDataType,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
    InvalidEntryType {
        entry_type: Option<u8>,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display("Invalid system catalog key: {:?}", key))]
    InvalidKey {
        key: Option<String>,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display("Catalog value is not present"))]
-    EmptyValue { backtrace: Backtrace },
+    EmptyValue { location: Location },

    #[snafu(display("Failed to deserialize value, source: {}", source))]
    ValueDeserialize {
        source: serde_json::error::Error,
-        backtrace: Backtrace,
+        location: Location,
+    },
+
+    #[snafu(display("Table engine not found: {}, source: {}", engine_name, source))]
+    TableEngineNotFound {
+        engine_name: String,
+        location: Location,
+        source: table::error::Error,
    },

    #[snafu(display("Cannot find catalog by name: {}", catalog_name))]
    CatalogNotFound {
        catalog_name: String,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display("Cannot find schema {} in catalog {}", schema, catalog))]
    SchemaNotFound {
        catalog: String,
        schema: String,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display("Table `{}` already exists", table))]
-    TableExists { table: String, backtrace: Backtrace },
+    TableExists { table: String, location: Location },

-    #[snafu(display("Table `{}` not exist", table))]
-    TableNotExist { table: String, backtrace: Backtrace },
+    #[snafu(display("Table not found: {}", table))]
+    TableNotExist { table: String, location: Location },

    #[snafu(display("Schema {} already exists", schema))]
-    SchemaExists {
-        schema: String,
-        backtrace: Backtrace,
-    },
+    SchemaExists { schema: String, location: Location },

    #[snafu(display("Operation {} not implemented yet", operation))]
    Unimplemented {
        operation: String,
-        backtrace: Backtrace,
+        location: Location,
    },

+    #[snafu(display("Operation {} not supported", op))]
+    NotSupported { op: String, location: Location },
+
    #[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
    OpenTable {
        table_info: String,
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

+    #[snafu(display("Failed to open table in parallel, source: {}", source))]
+    ParallelOpenTable { source: JoinError },
+
    #[snafu(display("Table not found while opening table, table info: {}", table_info))]
    TableNotFound {
        table_info: String,
-        backtrace: Backtrace,
+        location: Location,
    },

    #[snafu(display("Failed to read system catalog table records"))]
    ReadSystemCatalog {
-        #[snafu(backtrace)]
+        location: Location,
+        source: common_recordbatch::error::Error,
+    },
+
+    #[snafu(display("Failed to create recordbatch, source: {}", source))]
+    CreateRecordBatch {
+        location: Location,
        source: common_recordbatch::error::Error,
    },

@@ -137,7 +162,7 @@ pub enum Error {
        source
    ))]
    InsertCatalogRecord {
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

@@ -148,62 +173,82 @@ pub enum Error {
    ))]
    DeregisterTable {
        request: DeregisterTableRequest,
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

    #[snafu(display("Illegal catalog manager state: {}", msg))]
-    IllegalManagerState { backtrace: Backtrace, msg: String },
+    IllegalManagerState { location: Location, msg: String },

    #[snafu(display("Failed to scan system catalog table, source: {}", source))]
    SystemCatalogTableScan {
-        #[snafu(backtrace)]
+        location: Location,
        source: table::error::Error,
    },

-    #[snafu(display("Failure during SchemaProvider operation, source: {}", source))]
-    SchemaProviderOperation {
-        #[snafu(backtrace)]
-        source: BoxedError,
-    },
-
    #[snafu(display("{source}"))]
    Internal {
-        #[snafu(backtrace)]
+        location: Location,
        source: BoxedError,
    },

+    #[snafu(display(
+        "Failed to upgrade weak catalog manager reference. location: {}",
+        location
+    ))]
+    UpgradeWeakCatalogManagerRef { location: Location },
+
    #[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
    SystemCatalogTableScanExec {
-        #[snafu(backtrace)]
+        location: Location,
        source: common_query::error::Error,
    },
+
    #[snafu(display("Cannot parse catalog value, source: {}", source))]
    InvalidCatalogValue {
-        #[snafu(backtrace)]
+        location: Location,
        source: common_catalog::error::Error,
    },

    #[snafu(display("Failed to perform metasrv operation, source: {}", source))]
    MetaSrv {
-        #[snafu(backtrace)]
+        location: Location,
        source: meta_client::error::Error,
    },

    #[snafu(display("Invalid table info in catalog, source: {}", source))]
    InvalidTableInfoInCatalog {
-        #[snafu(backtrace)]
+        location: Location,
        source: datatypes::error::Error,
    },

-    #[snafu(display("Failed to serialize or deserialize catalog entry: {}", source))]
-    CatalogEntrySerde {
-        #[snafu(backtrace)]
-        source: common_catalog::error::Error,
-    },
-
    #[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
    QueryAccessDenied { catalog: String, schema: String },
+
+    #[snafu(display("Invalid system table definition: {err_msg}"))]
+    InvalidSystemTableDef { err_msg: String, location: Location },
+
+    #[snafu(display("{}: {}", msg, source))]
+    Datafusion {
+        msg: String,
+        source: DataFusionError,
+        location: Location,
+    },
+
+    #[snafu(display("Table schema mismatch, source: {}", source))]
+    TableSchemaMismatch {
+        location: Location,
+        source: table::error::Error,
+    },
+
+    #[snafu(display("A generic error has occurred, msg: {}", msg))]
+    Generic { msg: String, location: Location },
+
+    #[snafu(display("Table metadata manager error: {}", source))]
+    TableMetadataManager {
+        source: common_meta::error::Error,
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -216,47 +261,53 @@ impl ErrorExt for Error {
            | Error::TableNotFound { .. }
            | Error::IllegalManagerState { .. }
            | Error::CatalogNotFound { .. }
-            | Error::InvalidEntryType { .. } => StatusCode::Unexpected,
+            | Error::InvalidEntryType { .. }
+            | Error::InvalidSystemTableDef { .. }
+            | Error::ParallelOpenTable { .. } => StatusCode::Unexpected,

            Error::SystemCatalog { .. }
            | Error::EmptyValue { .. }
            | Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,

-            Error::SystemCatalogTypeMismatch { .. } => StatusCode::Internal,
+            Error::Generic { .. }
+            | Error::SystemCatalogTypeMismatch { .. }
+            | Error::UpgradeWeakCatalogManagerRef { .. } => StatusCode::Internal,

-            Error::ReadSystemCatalog { source, .. } => source.status_code(),
-            Error::InvalidCatalogValue { source, .. } | Error::CatalogEntrySerde { source } => {
+            Error::ReadSystemCatalog { source, .. } | Error::CreateRecordBatch { source, .. } => {
                source.status_code()
            }
+            Error::InvalidCatalogValue { source, .. } => source.status_code(),

            Error::TableExists { .. } => StatusCode::TableAlreadyExists,
            Error::TableNotExist { .. } => StatusCode::TableNotFound,
-            Error::SchemaExists { .. } => StatusCode::InvalidArguments,
+            Error::SchemaExists { .. } | Error::TableEngineNotFound { .. } => {
+                StatusCode::InvalidArguments
+            }

            Error::OpenSystemCatalog { source, .. }
            | Error::CreateSystemCatalog { source, .. }
            | Error::InsertCatalogRecord { source, .. }
            | Error::OpenTable { source, .. }
            | Error::CreateTable { source, .. }
-            | Error::DeregisterTable { source, .. } => source.status_code(),
+            | Error::DeregisterTable { source, .. }
+            | Error::TableSchemaMismatch { source, .. } => source.status_code(),

            Error::MetaSrv { source, .. } => source.status_code(),
-            Error::SystemCatalogTableScan { source } => source.status_code(),
-            Error::SystemCatalogTableScanExec { source } => source.status_code(),
-            Error::InvalidTableInfoInCatalog { source } => source.status_code(),
-            Error::SchemaProviderOperation { source } | Error::Internal { source } => {
+            Error::SystemCatalogTableScan { source, .. } => source.status_code(),
+            Error::SystemCatalogTableScanExec { source, .. } => source.status_code(),
+            Error::InvalidTableInfoInCatalog { source, .. } => source.status_code(),
+
+            Error::CompileScriptInternal { source, .. } | Error::Internal { source, .. } => {
                source.status_code()
            }

-            Error::Unimplemented { .. } => StatusCode::Unsupported,
+            Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported,
            Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
+            Error::Datafusion { .. } => StatusCode::EngineExecuteQuery,
+            Error::TableMetadataManager { source, .. } => source.status_code(),
        }
    }

-    fn backtrace_opt(&self) -> Option<&Backtrace> {
-        ErrorCompat::backtrace(self)
-    }
-
    fn as_any(&self) -> &dyn Any {
        self
    }
@@ -280,7 +331,7 @@ mod tests {
            StatusCode::TableAlreadyExists,
            Error::TableExists {
                table: "some_table".to_string(),
-                backtrace: Backtrace::generate(),
+                location: Location::generate(),
            }
            .status_code()
        );
@@ -294,7 +345,7 @@ mod tests {
            StatusCode::StorageUnavailable,
            Error::SystemCatalog {
                msg: "".to_string(),
-                backtrace: Backtrace::generate(),
+                location: Location::generate(),
            }
            .status_code()
        );
@@ -303,7 +354,7 @@ mod tests {
            StatusCode::Internal,
            Error::SystemCatalogTypeMismatch {
                data_type: ConcreteDataType::binary_datatype(),
-                backtrace: Backtrace::generate(),
+                location: Location::generate(),
            }
            .status_code()
        );
@@ -317,7 +368,7 @@ mod tests {
    pub fn test_errors_to_datafusion_error() {
        let e: DataFusionError = Error::TableExists {
            table: "test_table".to_string(),
-            backtrace: Backtrace::generate(),
+            location: Location::generate(),
        }
        .into();
        match e {
--- a/src/catalog/src/helper.rs
+++ b/src/catalog/src/helper.rs
@@ -1,379 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::fmt::{Display, Formatter};
-
-use common_catalog::error::{
-    DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
-};
-use lazy_static::lazy_static;
-use regex::Regex;
-use serde::{Deserialize, Serialize, Serializer};
-use snafu::{ensure, OptionExt, ResultExt};
-use table::metadata::{RawTableInfo, TableId, TableVersion};
-
-pub const CATALOG_KEY_PREFIX: &str = "__c";
-pub const SCHEMA_KEY_PREFIX: &str = "__s";
-pub const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
-pub const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
-
-const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
-
-lazy_static! {
-    static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
-        "^{CATALOG_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})$"
-    ))
-    .unwrap();
-}
-
-lazy_static! {
-    static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
-        "^{SCHEMA_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
-    ))
-    .unwrap();
-}
-
-lazy_static! {
-    static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
-        "^{TABLE_GLOBAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
-    ))
-    .unwrap();
-}
-
-lazy_static! {
-    static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
-        "^{TABLE_REGIONAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-([0-9]+)$"
-    ))
-    .unwrap();
-}
-
-pub fn build_catalog_prefix() -> String {
-    format!("{CATALOG_KEY_PREFIX}-")
-}
-
-pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
-    format!("{SCHEMA_KEY_PREFIX}-{}-", catalog_name.as_ref())
-}
-
-pub fn build_table_global_prefix(
-    catalog_name: impl AsRef<str>,
-    schema_name: impl AsRef<str>,
-) -> String {
-    format!(
-        "{TABLE_GLOBAL_KEY_PREFIX}-{}-{}-",
-        catalog_name.as_ref(),
-        schema_name.as_ref()
-    )
-}
-
-pub fn build_table_regional_prefix(
-    catalog_name: impl AsRef<str>,
-    schema_name: impl AsRef<str>,
-) -> String {
-    format!(
-        "{}-{}-{}-",
-        TABLE_REGIONAL_KEY_PREFIX,
-        catalog_name.as_ref(),
-        schema_name.as_ref()
-    )
-}
-
-/// Table global info has only one key across all datanodes so it does not have `node_id` field.
-#[derive(Clone)]
-pub struct TableGlobalKey {
-    pub catalog_name: String,
-    pub schema_name: String,
-    pub table_name: String,
-}
-
-impl Display for TableGlobalKey {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
-        f.write_str("-")?;
-        f.write_str(&self.catalog_name)?;
-        f.write_str("-")?;
-        f.write_str(&self.schema_name)?;
-        f.write_str("-")?;
-        f.write_str(&self.table_name)
-    }
-}
-
-impl TableGlobalKey {
-    pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
-        let key = s.as_ref();
-        let captures = TABLE_GLOBAL_KEY_PATTERN
-            .captures(key)
-            .context(InvalidCatalogSnafu { key })?;
-        ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
-
-        Ok(Self {
-            catalog_name: captures[1].to_string(),
-            schema_name: captures[2].to_string(),
-            table_name: captures[3].to_string(),
-        })
-    }
-}
-
-/// Table global info contains necessary info for a datanode to create table regions, including
-/// table id, table meta(schema...), region id allocation across datanodes.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct TableGlobalValue {
-    /// Id of datanode that created the global table info kv. only for debugging.
-    pub node_id: u64,
-    /// Allocation of region ids across all datanodes.
-    pub regions_id_map: HashMap<u64, Vec<u32>>,
-    pub table_info: RawTableInfo,
-}
-
-impl TableGlobalValue {
-    pub fn table_id(&self) -> TableId {
-        self.table_info.ident.table_id
-    }
-}
-
-/// Table regional info that varies between datanode, so it contains a `node_id` field.
-pub struct TableRegionalKey {
-    pub catalog_name: String,
-    pub schema_name: String,
-    pub table_name: String,
-    pub node_id: u64,
-}
-
-impl Display for TableRegionalKey {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
-        f.write_str("-")?;
-        f.write_str(&self.catalog_name)?;
-        f.write_str("-")?;
-        f.write_str(&self.schema_name)?;
-        f.write_str("-")?;
-        f.write_str(&self.table_name)?;
-        f.write_str("-")?;
-        f.serialize_u64(self.node_id)
-    }
-}
-
-impl TableRegionalKey {
-    pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
-        let key = s.as_ref();
-        let captures = TABLE_REGIONAL_KEY_PATTERN
-            .captures(key)
-            .context(InvalidCatalogSnafu { key })?;
-        ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
-        let node_id = captures[4]
-            .to_string()
-            .parse()
-            .map_err(|_| InvalidCatalogSnafu { key }.build())?;
-        Ok(Self {
-            catalog_name: captures[1].to_string(),
-            schema_name: captures[2].to_string(),
-            table_name: captures[3].to_string(),
-            node_id,
-        })
-    }
-}
-
-/// Regional table info of specific datanode, including table version on that datanode and
-/// region ids allocated by metasrv.
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct TableRegionalValue {
-    pub version: TableVersion,
-    pub regions_ids: Vec<u32>,
-}
-
-pub struct CatalogKey {
-    pub catalog_name: String,
-}
-
-impl Display for CatalogKey {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.write_str(CATALOG_KEY_PREFIX)?;
-        f.write_str("-")?;
-        f.write_str(&self.catalog_name)
-    }
-}
-
-impl CatalogKey {
-    pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
-        let key = s.as_ref();
-        let captures = CATALOG_KEY_PATTERN
-            .captures(key)
-            .context(InvalidCatalogSnafu { key })?;
-        ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
-        Ok(Self {
-            catalog_name: captures[1].to_string(),
-        })
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct CatalogValue;
-
-pub struct SchemaKey {
-    pub catalog_name: String,
-    pub schema_name: String,
-}
-
-impl Display for SchemaKey {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        f.write_str(SCHEMA_KEY_PREFIX)?;
-        f.write_str("-")?;
-        f.write_str(&self.catalog_name)?;
-        f.write_str("-")?;
-        f.write_str(&self.schema_name)
-    }
-}
-
-impl SchemaKey {
-    pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
-        let key = s.as_ref();
-        let captures = SCHEMA_KEY_PATTERN
-            .captures(key)
-            .context(InvalidCatalogSnafu { key })?;
-        ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
-        Ok(Self {
-            catalog_name: captures[1].to_string(),
-            schema_name: captures[2].to_string(),
-        })
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct SchemaValue;
-
-macro_rules! define_catalog_value {
-    ( $($val_ty: ty), *) => {
-            $(
-                impl $val_ty {
-                    pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
-                        serde_json::from_str(s.as_ref())
-                            .context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
-                    }
-
-                    pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
-                         Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
-                    }
-
-                    pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
-                        Ok(serde_json::to_string(self)
-                            .context(SerializeCatalogEntryValueSnafu)?
-                            .into_bytes())
-                    }
-                }
-            )*
-        }
-}
-
-define_catalog_value!(
-    TableRegionalValue,
-    TableGlobalValue,
-    CatalogValue,
-    SchemaValue
-);
-
-#[cfg(test)]
-mod tests {
-    use datatypes::prelude::ConcreteDataType;
-    use datatypes::schema::{ColumnSchema, RawSchema, Schema};
-    use table::metadata::{RawTableMeta, TableIdent, TableType};
-
-    use super::*;
-
-    #[test]
-    fn test_parse_catalog_key() {
-        let key = "__c-C";
-        let catalog_key = CatalogKey::parse(key).unwrap();
-        assert_eq!("C", catalog_key.catalog_name);
-        assert_eq!(key, catalog_key.to_string());
-    }
-
-    #[test]
-    fn test_parse_schema_key() {
-        let key = "__s-C-S";
-        let schema_key = SchemaKey::parse(key).unwrap();
-        assert_eq!("C", schema_key.catalog_name);
-        assert_eq!("S", schema_key.schema_name);
-        assert_eq!(key, schema_key.to_string());
-    }
-
-    #[test]
-    fn test_parse_table_key() {
-        let key = "__tg-C-S-T";
-        let entry = TableGlobalKey::parse(key).unwrap();
-        assert_eq!("C", entry.catalog_name);
-        assert_eq!("S", entry.schema_name);
-        assert_eq!("T", entry.table_name);
-        assert_eq!(key, &entry.to_string());
-    }
-
-    #[test]
-    fn test_build_prefix() {
-        assert_eq!("__c-", build_catalog_prefix());
-        assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
-        assert_eq!(
-            "__tg-CATALOG-SCHEMA-",
-            build_table_global_prefix("CATALOG", "SCHEMA")
-        );
-    }
-
-    #[test]
-    fn test_serialize_schema() {
-        let schema = Schema::new(vec![ColumnSchema::new(
-            "name",
-            ConcreteDataType::string_datatype(),
-            true,
-        )]);
-
-        let meta = RawTableMeta {
-            schema: RawSchema::from(&schema),
-            engine: "mito".to_string(),
-            created_on: chrono::DateTime::default(),
-            primary_key_indices: vec![0, 1],
-            next_column_id: 3,
-            engine_options: Default::default(),
-            value_indices: vec![2, 3],
-            options: Default::default(),
-            region_numbers: vec![1],
-        };
-
-        let table_info = RawTableInfo {
-            ident: TableIdent {
-                table_id: 42,
-                version: 1,
-            },
-            name: "table_1".to_string(),
-            desc: Some("blah".to_string()),
-            catalog_name: "catalog_1".to_string(),
-            schema_name: "schema_1".to_string(),
-            meta,
-            table_type: TableType::Base,
-        };
-
-        let value = TableGlobalValue {
-            node_id: 0,
-            regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
-            table_info,
-        };
-        let serialized = serde_json::to_string(&value).unwrap();
-        let deserialized = TableGlobalValue::parse(serialized).unwrap();
-        assert_eq!(value, deserialized);
-    }
-
-    #[test]
-    fn test_table_global_value_compatibility() {
-        let s = r#"{"node_id":1,"regions_id_map":{"1":[0]},"table_info":{"ident":{"table_id":1098,"version":1},"name":"container_cpu_limit","desc":"Created on insertion","catalog_name":"greptime","schema_name":"dd","meta":{"schema":{"column_schemas":[{"name":"container_id","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"container_name","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"docker_image","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"host","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"image_name","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"image_tag","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"interval","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"runtime","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"short_image","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"type","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"dd_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"ts","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"git.repository_url","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":11,"version":1},"primary_key_indices":[0,1,2,3,4,5,6,7,8,9,12],"value_indices":[10,11],"engine":"mito","next_column_id":12,"region_numbers":[],"engine_options":{},"options":{},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#;
-        TableGlobalValue::parse(s).unwrap();
-    }
-}
--- a/src/catalog/src/information_schema.rs
+++ b/src/catalog/src/information_schema.rs
@@ -0,0 +1,142 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod columns;
+mod tables;
+
+use std::any::Any;
+use std::sync::{Arc, Weak};
+
+use async_trait::async_trait;
+use common_error::ext::BoxedError;
+use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
+use datatypes::schema::SchemaRef;
+use futures_util::StreamExt;
+use snafu::ResultExt;
+use store_api::storage::ScanRequest;
+use table::error::{SchemaConversionSnafu, TablesRecordBatchSnafu};
+use table::metadata::TableType;
+use table::{Result as TableResult, Table, TableRef};
+
+use self::columns::InformationSchemaColumns;
+use crate::error::Result;
+use crate::information_schema::tables::InformationSchemaTables;
+use crate::CatalogManager;
+
+const TABLES: &str = "tables";
+const COLUMNS: &str = "columns";
+
+pub struct InformationSchemaProvider {
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaProvider {
+    pub fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        Self {
+            catalog_name,
+            catalog_manager,
+        }
+    }
+}
+
+impl InformationSchemaProvider {
+    pub fn table(&self, name: &str) -> Result<Option<TableRef>> {
+        let stream_builder = match name.to_ascii_lowercase().as_ref() {
+            TABLES => Arc::new(InformationSchemaTables::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _,
+            COLUMNS => Arc::new(InformationSchemaColumns::new(
+                self.catalog_name.clone(),
+                self.catalog_manager.clone(),
+            )) as _,
+            _ => {
+                return Ok(None);
+            }
+        };
+
+        Ok(Some(Arc::new(InformationTable::new(stream_builder))))
+    }
+}
+
+// TODO(ruihang): make it a more generic trait:
+// https://github.com/GreptimeTeam/greptimedb/pull/1639#discussion_r1205001903
+pub trait InformationStreamBuilder: Send + Sync {
+    fn to_stream(&self) -> Result<SendableRecordBatchStream>;
+
+    fn schema(&self) -> SchemaRef;
+}
+
+pub struct InformationTable {
+    stream_builder: Arc<dyn InformationStreamBuilder>,
+}
+
+impl InformationTable {
+    pub fn new(stream_builder: Arc<dyn InformationStreamBuilder>) -> Self {
+        Self { stream_builder }
+    }
+}
+
+#[async_trait]
+impl Table for InformationTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.stream_builder.schema()
+    }
+
+    fn table_info(&self) -> table::metadata::TableInfoRef {
+        unreachable!("Should not call table_info() of InformationTable directly")
+    }
+
+    fn table_type(&self) -> table::metadata::TableType {
+        TableType::View
+    }
+
+    async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
+        let projection = request.projection;
+        let projected_schema = if let Some(projection) = &projection {
+            Arc::new(
+                self.schema()
+                    .try_project(projection)
+                    .context(SchemaConversionSnafu)?,
+            )
+        } else {
+            self.schema()
+        };
+        let stream = self
+            .stream_builder
+            .to_stream()
+            .map_err(BoxedError::new)
+            .context(TablesRecordBatchSnafu)?
+            .map(move |batch| {
+                batch.and_then(|batch| {
+                    if let Some(projection) = &projection {
+                        batch.try_project(projection)
+                    } else {
+                        Ok(batch)
+                    }
+                })
+            });
+        let stream = RecordBatchStreamAdaptor {
+            schema: projected_schema,
+            stream: Box::pin(stream),
+            output_ordering: None,
+        };
+        Ok(Box::pin(stream))
+    }
+}
--- a/src/catalog/src/information_schema/columns.rs
+++ b/src/catalog/src/information_schema/columns.rs
@@ -0,0 +1,231 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::{
+    SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
+};
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, DataType};
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::vectors::{StringVectorBuilder, VectorRef};
+use snafu::{OptionExt, ResultExt};
+
+use super::InformationStreamBuilder;
+use crate::error::{
+    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::CatalogManager;
+
+pub(super) struct InformationSchemaColumns {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+const TABLE_CATALOG: &str = "table_catalog";
+const TABLE_SCHEMA: &str = "table_schema";
+const TABLE_NAME: &str = "table_name";
+const COLUMN_NAME: &str = "column_name";
+const DATA_TYPE: &str = "data_type";
+const SEMANTIC_TYPE: &str = "semantic_type";
+
+impl InformationSchemaColumns {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        let schema = Arc::new(Schema::new(vec![
+            ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
+        ]));
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    fn builder(&self) -> InformationSchemaColumnsBuilder {
+        InformationSchemaColumnsBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationStreamBuilder for InformationSchemaColumns {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_tables()
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+struct InformationSchemaColumnsBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    catalog_names: StringVectorBuilder,
+    schema_names: StringVectorBuilder,
+    table_names: StringVectorBuilder,
+    column_names: StringVectorBuilder,
+    data_types: StringVectorBuilder,
+    semantic_types: StringVectorBuilder,
+}
+
+impl InformationSchemaColumnsBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            catalog_names: StringVectorBuilder::with_capacity(42),
+            schema_names: StringVectorBuilder::with_capacity(42),
+            table_names: StringVectorBuilder::with_capacity(42),
+            column_names: StringVectorBuilder::with_capacity(42),
+            data_types: StringVectorBuilder::with_capacity(42),
+            semantic_types: StringVectorBuilder::with_capacity(42),
+        }
+    }
+
+    /// Construct the `information_schema.tables` virtual table
+    async fn make_tables(&mut self) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            if !catalog_manager
+                .schema_exist(&catalog_name, &schema_name)
+                .await?
+            {
+                continue;
+            }
+            for table_name in catalog_manager
+                .table_names(&catalog_name, &schema_name)
+                .await?
+            {
+                let Some(table) = catalog_manager.table(&catalog_name, &schema_name, &table_name).await? else { continue };
+                let keys = &table.table_info().meta.primary_key_indices;
+                let schema = table.schema();
+                for (idx, column) in schema.column_schemas().iter().enumerate() {
+                    let semantic_type = if column.is_time_index() {
+                        SEMANTIC_TYPE_TIME_INDEX
+                    } else if keys.contains(&idx) {
+                        SEMANTIC_TYPE_PRIMARY_KEY
+                    } else {
+                        SEMANTIC_TYPE_FIELD
+                    };
+                    self.add_column(
+                        &catalog_name,
+                        &schema_name,
+                        &table_name,
+                        &column.name,
+                        column.data_type.name(),
+                        semantic_type,
+                    );
+                }
+            }
+        }
+
+        self.finish()
+    }
+
+    fn add_column(
+        &mut self,
+        catalog_name: &str,
+        schema_name: &str,
+        table_name: &str,
+        column_name: &str,
+        data_type: &str,
+        semantic_type: &str,
+    ) {
+        self.catalog_names.push(Some(catalog_name));
+        self.schema_names.push(Some(schema_name));
+        self.table_names.push(Some(table_name));
+        self.column_names.push(Some(column_name));
+        self.data_types.push(Some(data_type));
+        self.semantic_types.push(Some(semantic_type));
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.catalog_names.finish()),
+            Arc::new(self.schema_names.finish()),
+            Arc::new(self.table_names.finish()),
+            Arc::new(self.column_names.finish()),
+            Arc::new(self.data_types.finish()),
+            Arc::new(self.semantic_types.finish()),
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaColumns {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_tables()
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/information_schema/tables.rs
+++ b/src/catalog/src/information_schema/tables.rs
@@ -0,0 +1,223 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::{Arc, Weak};
+
+use arrow_schema::SchemaRef as ArrowSchemaRef;
+use common_catalog::consts::INFORMATION_SCHEMA_NAME;
+use common_error::ext::BoxedError;
+use common_query::physical_plan::TaskContext;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
+use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
+use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
+use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
+use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
+use snafu::{OptionExt, ResultExt};
+use table::metadata::TableType;
+
+use crate::error::{
+    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
+};
+use crate::information_schema::InformationStreamBuilder;
+use crate::CatalogManager;
+
+pub(super) struct InformationSchemaTables {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+}
+
+impl InformationSchemaTables {
+    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+        let schema = Arc::new(Schema::new(vec![
+            ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
+            ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true),
+            ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true),
+        ]));
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+        }
+    }
+
+    fn builder(&self) -> InformationSchemaTablesBuilder {
+        InformationSchemaTablesBuilder::new(
+            self.schema.clone(),
+            self.catalog_name.clone(),
+            self.catalog_manager.clone(),
+        )
+    }
+}
+
+impl InformationStreamBuilder for InformationSchemaTables {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn to_stream(&self) -> Result<SendableRecordBatchStream> {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        let stream = Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_tables()
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ));
+        Ok(Box::pin(
+            RecordBatchStreamAdapter::try_new(stream)
+                .map_err(BoxedError::new)
+                .context(InternalSnafu)?,
+        ))
+    }
+}
+
+/// Builds the `information_schema.TABLE` table row by row
+///
+/// Columns are based on <https://www.postgresql.org/docs/current/infoschema-columns.html>
+struct InformationSchemaTablesBuilder {
+    schema: SchemaRef,
+    catalog_name: String,
+    catalog_manager: Weak<dyn CatalogManager>,
+
+    catalog_names: StringVectorBuilder,
+    schema_names: StringVectorBuilder,
+    table_names: StringVectorBuilder,
+    table_types: StringVectorBuilder,
+    table_ids: UInt32VectorBuilder,
+    engines: StringVectorBuilder,
+}
+
+impl InformationSchemaTablesBuilder {
+    fn new(
+        schema: SchemaRef,
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+    ) -> Self {
+        Self {
+            schema,
+            catalog_name,
+            catalog_manager,
+            catalog_names: StringVectorBuilder::with_capacity(42),
+            schema_names: StringVectorBuilder::with_capacity(42),
+            table_names: StringVectorBuilder::with_capacity(42),
+            table_types: StringVectorBuilder::with_capacity(42),
+            table_ids: UInt32VectorBuilder::with_capacity(42),
+            engines: StringVectorBuilder::with_capacity(42),
+        }
+    }
+
+    /// Construct the `information_schema.tables` virtual table
+    async fn make_tables(&mut self) -> Result<RecordBatch> {
+        let catalog_name = self.catalog_name.clone();
+        let catalog_manager = self
+            .catalog_manager
+            .upgrade()
+            .context(UpgradeWeakCatalogManagerRefSnafu)?;
+
+        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+            if schema_name == INFORMATION_SCHEMA_NAME {
+                continue;
+            }
+            if !catalog_manager
+                .schema_exist(&catalog_name, &schema_name)
+                .await?
+            {
+                continue;
+            }
+
+            for table_name in catalog_manager
+                .table_names(&catalog_name, &schema_name)
+                .await?
+            {
+                let Some(table) = catalog_manager.table(&catalog_name, &schema_name, &table_name).await? else { continue };
+                let table_info = table.table_info();
+                self.add_table(
+                    &catalog_name,
+                    &schema_name,
+                    &table_name,
+                    table.table_type(),
+                    Some(table_info.ident.table_id),
+                    Some(&table_info.meta.engine),
+                );
+            }
+        }
+
+        self.finish()
+    }
+
+    fn add_table(
+        &mut self,
+        catalog_name: &str,
+        schema_name: &str,
+        table_name: &str,
+        table_type: TableType,
+        table_id: Option<u32>,
+        engine: Option<&str>,
+    ) {
+        self.catalog_names.push(Some(catalog_name));
+        self.schema_names.push(Some(schema_name));
+        self.table_names.push(Some(table_name));
+        self.table_types.push(Some(match table_type {
+            TableType::Base => "BASE TABLE",
+            TableType::View => "VIEW",
+            TableType::Temporary => "LOCAL TEMPORARY",
+        }));
+        self.table_ids.push(table_id);
+        self.engines.push(engine);
+    }
+
+    fn finish(&mut self) -> Result<RecordBatch> {
+        let columns: Vec<VectorRef> = vec![
+            Arc::new(self.catalog_names.finish()),
+            Arc::new(self.schema_names.finish()),
+            Arc::new(self.table_names.finish()),
+            Arc::new(self.table_types.finish()),
+            Arc::new(self.table_ids.finish()),
+            Arc::new(self.engines.finish()),
+        ];
+        RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
+    }
+}
+
+impl DfPartitionStream for InformationSchemaTables {
+    fn schema(&self) -> &ArrowSchemaRef {
+        self.schema.arrow_schema()
+    }
+
+    fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
+        let schema = self.schema.arrow_schema().clone();
+        let mut builder = self.builder();
+        Box::pin(DfRecordBatchStreamAdapter::new(
+            schema,
+            futures::stream::once(async move {
+                builder
+                    .make_tables()
+                    .await
+                    .map(|x| x.into_df_record_batch())
+                    .map_err(Into::into)
+            }),
+        ))
+    }
+}
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -12,92 +12,66 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#![feature(trait_upcasting)]
 #![feature(assert_matches)]
+#![feature(try_blocks)]

 use std::any::Any;
+use std::collections::HashMap;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;

-use common_telemetry::info;
-use snafu::{OptionExt, ResultExt};
+use api::v1::meta::{RegionStat, TableIdent, TableName};
+use common_telemetry::{info, warn};
+use snafu::ResultExt;
 use table::engine::{EngineContext, TableEngineRef};
 use table::metadata::TableId;
 use table::requests::CreateTableRequest;
 use table::TableRef;

 use crate::error::{CreateTableSnafu, Result};
-pub use crate::schema::{SchemaProvider, SchemaProviderRef};

 pub mod error;
-pub mod helper;
+pub mod information_schema;
 pub mod local;
+mod metrics;
 pub mod remote;
-pub mod schema;
 pub mod system;
 pub mod table_source;
 pub mod tables;

-/// Represent a list of named catalogs
-pub trait CatalogList: Sync + Send {
-    /// Returns the catalog list as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Adds a new catalog to this catalog list
-    /// If a catalog of the same name existed before, it is replaced in the list and returned.
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: CatalogProviderRef,
-    ) -> Result<Option<CatalogProviderRef>>;
-
-    /// Retrieves the list of available catalog names
-    fn catalog_names(&self) -> Result<Vec<String>>;
-
-    /// Retrieves a specific catalog by name, provided it exists.
-    fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>>;
-}
-
-/// Represents a catalog, comprising a number of named schemas.
-pub trait CatalogProvider: Sync + Send {
-    /// Returns the catalog provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available schema names in this catalog.
-    fn schema_names(&self) -> Result<Vec<String>>;
-
-    /// Registers schema to this catalog.
-    fn register_schema(
-        &self,
-        name: String,
-        schema: SchemaProviderRef,
-    ) -> Result<Option<SchemaProviderRef>>;
-
-    /// Retrieves a specific schema from the catalog by name, provided it exists.
-    fn schema(&self, name: &str) -> Result<Option<SchemaProviderRef>>;
-}
-
-pub type CatalogListRef = Arc<dyn CatalogList>;
-pub type CatalogProviderRef = Arc<dyn CatalogProvider>;
-
 #[async_trait::async_trait]
-pub trait CatalogManager: CatalogList {
+pub trait CatalogManager: Send + Sync {
+    fn as_any(&self) -> &dyn Any;
+
    /// Starts a catalog manager.
    async fn start(&self) -> Result<()>;

-    /// Registers a table within given catalog/schema to catalog manager,
-    /// returns whether the table registered.
-    async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
-
-    /// Deregisters a table within given catalog/schema to catalog manager,
-    /// returns whether the table deregistered.
-    async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
+    /// Registers a catalog to catalog manager, returns whether the catalog exist before.
+    async fn register_catalog(&self, name: String) -> Result<bool>;

    /// Register a schema with catalog name and schema name. Retuens whether the
    /// schema registered.
+    ///
+    /// # Errors
+    ///
+    /// This method will/should fail if catalog not exist
    async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;

+    /// Deregisters a database within given catalog/schema to catalog manager
+    async fn deregister_schema(&self, request: DeregisterSchemaRequest) -> Result<bool>;
+
+    /// Registers a table within given catalog/schema to catalog manager,
+    /// returns whether the table registered.
+    ///
+    /// # Errors
+    ///
+    /// This method will/should fail if catalog or schema not exist
+    async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
+
+    /// Deregisters a table within given catalog/schema to catalog manager
+    async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()>;
+
    /// Rename a table to [RenameTableRequest::new_table_name], returns whether the table is renamed.
    async fn rename_table(&self, request: RenameTableRequest) -> Result<bool>;

@@ -105,7 +79,17 @@ pub trait CatalogManager: CatalogList {
    async fn register_system_table(&self, request: RegisterSystemTableRequest)
        -> error::Result<()>;

-    fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>>;
+    async fn catalog_names(&self) -> Result<Vec<String>>;
+
+    async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>;
+
+    async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>;
+
+    async fn catalog_exist(&self, catalog: &str) -> Result<bool>;
+
+    async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool>;
+
+    async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>;

    /// Returns the table by catalog, schema and table name.
    async fn table(
@@ -168,17 +152,15 @@ pub struct DeregisterTableRequest {
 }

 #[derive(Debug, Clone)]
-pub struct RegisterSchemaRequest {
+pub struct DeregisterSchemaRequest {
    pub catalog: String,
    pub schema: String,
 }

-pub trait CatalogProviderFactory {
-    fn create(&self, catalog_name: String) -> CatalogProviderRef;
-}
-
-pub trait SchemaProviderFactory {
-    fn create(&self, catalog_name: String, schema_name: String) -> SchemaProviderRef;
+#[derive(Debug, Clone)]
+pub struct RegisterSchemaRequest {
+    pub catalog: String,
+    pub schema: String,
 }

 pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
@@ -206,7 +188,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
                        table_name,
                    ),
                })?;
-            manager
+            let _ = manager
                .register_table(RegisterTableRequest {
                    catalog: catalog_name.clone(),
                    schema: schema_name.clone(),
@@ -225,39 +207,57 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
    Ok(())
 }

-/// The number of regions in the datanode node.
-pub async fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
+/// The stat of regions in the datanode node.
+/// The number of regions can be got from len of vec.
+///
+/// Ignores any errors occurred during iterating regions. The intention of this method is to
+/// collect region stats that will be carried in Datanode's heartbeat to Metasrv, so it's a
+/// "try our best" job.
+pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> (u64, Vec<RegionStat>) {
    let mut region_number: u64 = 0;
+    let mut region_stats = Vec::new();

-    for catalog_name in catalog_manager.catalog_names()? {
-        let catalog =
-            catalog_manager
-                .catalog(&catalog_name)?
-                .context(error::CatalogNotFoundSnafu {
-                    catalog_name: &catalog_name,
-                })?;
+    let Ok(catalog_names) = catalog_manager.catalog_names().await else { return (region_number, region_stats) };
+    for catalog_name in catalog_names {
+        let Ok(schema_names) = catalog_manager.schema_names(&catalog_name).await else { continue };
+        for schema_name in schema_names {
+            let Ok(table_names) = catalog_manager.table_names(&catalog_name,&schema_name).await else { continue };
+            for table_name in table_names {
+                let Ok(Some(table)) = catalog_manager.table(&catalog_name, &schema_name, &table_name).await else { continue };

-        for schema_name in catalog.schema_names()? {
-            let schema = catalog
-                .schema(&schema_name)?
-                .context(error::SchemaNotFoundSnafu {
-                    catalog: &catalog_name,
-                    schema: &schema_name,
-                })?;
-
-            for table_name in schema.table_names()? {
-                let table =
-                    schema
-                        .table(&table_name)
-                        .await?
-                        .context(error::TableNotFoundSnafu {
-                            table_info: &table_name,
-                        })?;
-
-                let region_numbers = &table.table_info().meta.region_numbers;
+                let table_info = table.table_info();
+                let region_numbers = &table_info.meta.region_numbers;
                region_number += region_numbers.len() as u64;
+
+                let engine = &table_info.meta.engine;
+                let table_id = table_info.ident.table_id;
+
+                match table.region_stats() {
+                    Ok(stats) => {
+                        let stats = stats.into_iter().map(|stat| RegionStat {
+                            region_id: stat.region_id,
+                            table_ident: Some(TableIdent {
+                                table_id,
+                                table_name: Some(TableName {
+                                    catalog_name: catalog_name.clone(),
+                                    schema_name: schema_name.clone(),
+                                    table_name: table_name.clone(),
+                                }),
+                                engine: engine.clone(),
+                            }),
+                            approximate_bytes: stat.disk_usage_bytes as i64,
+                            attrs: HashMap::from([("engine_name".to_owned(), engine.clone())]),
+                            ..Default::default()
+                        });
+
+                        region_stats.extend(stats);
+                    }
+                    Err(e) => {
+                        warn!("Failed to get region status, err: {:?}", e);
+                    }
+                };
            }
        }
    }
-    Ok(region_number)
+    (region_number, region_stats)
 }
--- a/src/catalog/src/local.rs
+++ b/src/catalog/src/local.rs
@@ -16,6 +16,4 @@ pub mod manager;
 pub mod memory;

 pub use manager::LocalCatalogManager;
-pub use memory::{
-    new_memory_catalog_list, MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider,
-};
+pub use memory::{new_memory_catalog_manager, MemoryCatalogManager};
--- a/src/catalog/src/local/manager.rs
+++ b/src/catalog/src/local/manager.rs
@@ -18,7 +18,8 @@ use std::sync::Arc;

 use common_catalog::consts::{
    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
-    SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
+    MITO_ENGINE, NUMBERS_TABLE_ID, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
+    SYSTEM_CATALOG_TABLE_NAME,
 };
 use common_catalog::format_full_table_name;
 use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
@@ -26,36 +27,40 @@ use common_telemetry::{error, info};
 use datatypes::prelude::ScalarVector;
 use datatypes::vectors::{BinaryVector, UInt8Vector};
 use futures_util::lock::Mutex;
+use metrics::increment_gauge;
 use snafu::{ensure, OptionExt, ResultExt};
-use table::engine::{EngineContext, TableEngineRef};
+use table::engine::manager::TableEngineManagerRef;
+use table::engine::EngineContext;
 use table::metadata::TableId;
 use table::requests::OpenTableRequest;
-use table::table::numbers::NumbersTable;
+use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
 use table::table::TableIdProvider;
 use table::TableRef;

 use crate::error::{
    self, CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
    Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
-    SystemCatalogTypeMismatchSnafu, TableExistsSnafu, TableNotFoundSnafu,
+    SystemCatalogTypeMismatchSnafu, TableEngineNotFoundSnafu, TableExistsSnafu, TableNotExistSnafu,
+    TableNotFoundSnafu, UnimplementedSnafu,
 };
-use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
+use crate::information_schema::InformationSchemaProvider;
+use crate::local::memory::MemoryCatalogManager;
 use crate::system::{
    decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX,
    VALUE_INDEX,
 };
 use crate::tables::SystemCatalog;
 use crate::{
-    handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
+    handle_system_table_request, CatalogManager, CatalogManagerRef, DeregisterSchemaRequest,
    DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
-    RegisterTableRequest, RenameTableRequest, SchemaProvider, SchemaProviderRef,
+    RegisterTableRequest, RenameTableRequest,
 };

 /// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
 pub struct LocalCatalogManager {
    system: Arc<SystemCatalog>,
    catalogs: Arc<MemoryCatalogManager>,
-    engine: TableEngineRef,
+    engine_manager: TableEngineManagerRef,
    next_table_id: AtomicU32,
    init_lock: Mutex<bool>,
    register_lock: Mutex<()>,
@@ -63,19 +68,20 @@ pub struct LocalCatalogManager {
 }

 impl LocalCatalogManager {
-    /// Create a new [CatalogManager] with given user catalogs and table engine
-    pub async fn try_new(engine: TableEngineRef) -> Result<Self> {
+    /// Create a new [CatalogManager] with given user catalogs and mito engine
+    pub async fn try_new(engine_manager: TableEngineManagerRef) -> Result<Self> {
+        let engine = engine_manager
+            .engine(MITO_ENGINE)
+            .context(TableEngineNotFoundSnafu {
+                engine_name: MITO_ENGINE,
+            })?;
        let table = SystemCatalogTable::new(engine.clone()).await?;
-        let memory_catalog_list = crate::local::memory::new_memory_catalog_list()?;
-        let system_catalog = Arc::new(SystemCatalog::new(
-            table,
-            memory_catalog_list.clone(),
-            engine.clone(),
-        ));
+        let memory_catalog_manager = crate::local::memory::new_memory_catalog_manager()?;
+        let system_catalog = Arc::new(SystemCatalog::new(table));
        Ok(Self {
            system: system_catalog,
-            catalogs: memory_catalog_list,
-            engine,
+            catalogs: memory_catalog_manager,
+            engine_manager,
            next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
            init_lock: Mutex::new(false),
            register_lock: Mutex::new(()),
@@ -85,7 +91,7 @@ impl LocalCatalogManager {

    /// Scan all entries from system catalog table
    pub async fn init(&self) -> Result<()> {
-        self.init_system_catalog()?;
+        self.init_system_catalog().await?;
        let system_records = self.system.information_schema.system.records().await?;
        let entries = self.collect_system_catalog_entries(system_records).await?;
        let max_table_id = self.handle_system_catalog_entries(entries).await?;
@@ -100,31 +106,59 @@ impl LocalCatalogManager {

        // Processing system table hooks
        let mut sys_table_requests = self.system_table_requests.lock().await;
-        handle_system_table_request(self, self.engine.clone(), &mut sys_table_requests).await?;
+        let engine = self
+            .engine_manager
+            .engine(MITO_ENGINE)
+            .context(TableEngineNotFoundSnafu {
+                engine_name: MITO_ENGINE,
+            })?;
+
+        handle_system_table_request(self, engine, &mut sys_table_requests).await?;
        Ok(())
    }

-    fn init_system_catalog(&self) -> Result<()> {
-        let system_schema = Arc::new(MemorySchemaProvider::new());
-        system_schema.register_table(
-            SYSTEM_CATALOG_TABLE_NAME.to_string(),
-            self.system.information_schema.system.clone(),
-        )?;
-        let system_catalog = Arc::new(MemoryCatalogProvider::new());
-        system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema)?;
-        self.catalogs
-            .register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog)?;
+    async fn init_system_catalog(&self) -> Result<()> {
+        // register SystemCatalogTable
+        let _ = self
+            .catalogs
+            .register_catalog_sync(SYSTEM_CATALOG_NAME.to_string())?;
+        let _ = self.catalogs.register_schema_sync(RegisterSchemaRequest {
+            catalog: SYSTEM_CATALOG_NAME.to_string(),
+            schema: INFORMATION_SCHEMA_NAME.to_string(),
+        })?;
+        let register_table_req = RegisterTableRequest {
+            catalog: SYSTEM_CATALOG_NAME.to_string(),
+            schema: INFORMATION_SCHEMA_NAME.to_string(),
+            table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
+            table_id: SYSTEM_CATALOG_TABLE_ID,
+            table: self.system.information_schema.system.clone(),
+        };
+        let _ = self.catalogs.register_table(register_table_req).await?;

-        let default_catalog = Arc::new(MemoryCatalogProvider::new());
-        let default_schema = Arc::new(MemorySchemaProvider::new());
+        // register default catalog and default schema
+        let _ = self
+            .catalogs
+            .register_catalog_sync(DEFAULT_CATALOG_NAME.to_string())?;
+        let _ = self.catalogs.register_schema_sync(RegisterSchemaRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+        })?;

        // Add numbers table for test
-        let table = Arc::new(NumbersTable::default());
-        default_schema.register_table("numbers".to_string(), table)?;
+        let numbers_table = Arc::new(NumbersTable::default());
+        let register_number_table_req = RegisterTableRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: NUMBERS_TABLE_NAME.to_string(),
+            table_id: NUMBERS_TABLE_ID,
+            table: numbers_table,
+        };
+
+        let _ = self
+            .catalogs
+            .register_table(register_number_table_req)
+            .await?;

-        default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
-        self.catalogs
-            .register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog)?;
        Ok(())
    }

@@ -196,29 +230,26 @@ impl LocalCatalogManager {
        for entry in entries {
            match entry {
                Entry::Catalog(c) => {
-                    self.catalogs.register_catalog_if_absent(
-                        c.catalog_name.clone(),
-                        Arc::new(MemoryCatalogProvider::new()),
-                    );
+                    let _ = self
+                        .catalogs
+                        .register_catalog_if_absent(c.catalog_name.clone());
                    info!("Register catalog: {}", c.catalog_name);
                }
                Entry::Schema(s) => {
-                    let catalog =
-                        self.catalogs
-                            .catalog(&s.catalog_name)?
-                            .context(CatalogNotFoundSnafu {
-                                catalog_name: &s.catalog_name,
-                            })?;
-                    catalog.register_schema(
-                        s.schema_name.clone(),
-                        Arc::new(MemorySchemaProvider::new()),
-                    )?;
+                    let req = RegisterSchemaRequest {
+                        catalog: s.catalog_name.clone(),
+                        schema: s.schema_name.clone(),
+                    };
+                    let _ = self.catalogs.register_schema_sync(req)?;
                    info!("Registered schema: {:?}", s);
                }
                Entry::Table(t) => {
+                    max_table_id = max_table_id.max(t.table_id);
+                    if t.is_deleted {
+                        continue;
+                    }
                    self.open_and_register_table(&t).await?;
                    info!("Registered table: {:?}", t);
-                    max_table_id = max_table_id.max(t.table_id);
                }
            }
        }
@@ -233,30 +264,26 @@ impl LocalCatalogManager {
    }

    async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> {
-        let catalog = self
-            .catalogs
-            .catalog(&t.catalog_name)?
-            .context(CatalogNotFoundSnafu {
-                catalog_name: &t.catalog_name,
-            })?;
-        let schema = catalog
-            .schema(&t.schema_name)?
-            .context(SchemaNotFoundSnafu {
-                catalog: &t.catalog_name,
-                schema: &t.schema_name,
-            })?;
+        self.check_catalog_schema_exist(&t.catalog_name, &t.schema_name)
+            .await?;

        let context = EngineContext {};
-        let request = OpenTableRequest {
+        let open_request = OpenTableRequest {
            catalog_name: t.catalog_name.clone(),
            schema_name: t.schema_name.clone(),
            table_name: t.table_name.clone(),
            table_id: t.table_id,
+            region_numbers: vec![0],
        };
+        let engine = self
+            .engine_manager
+            .engine(&t.engine)
+            .context(TableEngineNotFoundSnafu {
+                engine_name: &t.engine,
+            })?;

-        let option = self
-            .engine
-            .open_table(&context, request)
+        let table_ref = engine
+            .open_table(&context, open_request)
            .await
            .with_context(|_| OpenTableSnafu {
                table_info: format!(
@@ -271,36 +298,49 @@ impl LocalCatalogManager {
                ),
            })?;

-        schema.register_table(t.table_name.clone(), option)?;
+        let register_request = RegisterTableRequest {
+            catalog: t.catalog_name.clone(),
+            schema: t.schema_name.clone(),
+            table_name: t.table_name.clone(),
+            table_id: t.table_id,
+            table: table_ref,
+        };
+        let _ = self.catalogs.register_table(register_request).await?;
+
        Ok(())
    }
-}

-impl CatalogList for LocalCatalogManager {
-    fn as_any(&self) -> &dyn Any {
-        self
+    async fn check_state(&self) -> Result<()> {
+        let started = self.init_lock.lock().await;
+        ensure!(
+            *started,
+            IllegalManagerStateSnafu {
+                msg: "Catalog manager not started",
+            }
+        );
+        Ok(())
    }

-    fn register_catalog(
+    async fn check_catalog_schema_exist(
        &self,
-        name: String,
-        catalog: CatalogProviderRef,
-    ) -> Result<Option<CatalogProviderRef>> {
-        self.catalogs.register_catalog(name, catalog)
-    }
-
-    fn catalog_names(&self) -> Result<Vec<String>> {
-        let mut res = self.catalogs.catalog_names()?;
-        res.push(SYSTEM_CATALOG_NAME.to_string());
-        Ok(res)
-    }
-
-    fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
-        if name.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
-            Ok(Some(self.system.clone()))
-        } else {
-            self.catalogs.catalog(name)
+        catalog_name: &str,
+        schema_name: &str,
+    ) -> Result<()> {
+        if !self.catalogs.catalog_exist(catalog_name).await? {
+            return CatalogNotFoundSnafu { catalog_name }.fail()?;
        }
+        if !self
+            .catalogs
+            .schema_exist(catalog_name, schema_name)
+            .await?
+        {
+            return SchemaNotFoundSnafu {
+                catalog: catalog_name,
+                schema: schema_name,
+            }
+            .fail()?;
+        }
+        Ok(())
    }
 }

@@ -320,32 +360,21 @@ impl CatalogManager for LocalCatalogManager {
    }

    async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
-        let started = self.init_lock.lock().await;
+        self.check_state().await?;

-        ensure!(
-            *started,
-            IllegalManagerStateSnafu {
-                msg: "Catalog manager not started",
-            }
-        );
+        let catalog_name = request.catalog.clone();
+        let schema_name = request.schema.clone();

-        let catalog_name = &request.catalog;
-        let schema_name = &request.schema;
-
-        let catalog = self
-            .catalogs
-            .catalog(catalog_name)?
-            .context(CatalogNotFoundSnafu { catalog_name })?;
-        let schema = catalog
-            .schema(schema_name)?
-            .with_context(|| SchemaNotFoundSnafu {
-                catalog: catalog_name,
-                schema: schema_name,
-            })?;
+        self.check_catalog_schema_exist(&catalog_name, &schema_name)
+            .await?;

        {
            let _lock = self.register_lock.lock().await;
-            if let Some(existing) = schema.table(&request.table_name).await? {
+            if let Some(existing) = self
+                .catalogs
+                .table(&request.catalog, &request.schema, &request.table_name)
+                .await?
+            {
                if existing.table_info().ident.table_id != request.table_id {
                    error!(
                        "Unexpected table register request: {:?}, existing: {:?}",
@@ -354,8 +383,8 @@ impl CatalogManager for LocalCatalogManager {
                    );
                    return TableExistsSnafu {
                        table: format_full_table_name(
-                            catalog_name,
-                            schema_name,
+                            &catalog_name,
+                            &schema_name,
                            &request.table_name,
                        ),
                    }
@@ -365,64 +394,75 @@ impl CatalogManager for LocalCatalogManager {
                Ok(false)
            } else {
                // table does not exist
-                self.system
+                let engine = request.table.table_info().meta.engine.to_string();
+                let table_name = request.table_name.clone();
+                let table_id = request.table_id;
+                let _ = self.catalogs.register_table(request).await?;
+                let _ = self
+                    .system
                    .register_table(
                        catalog_name.clone(),
                        schema_name.clone(),
-                        request.table_name.clone(),
-                        request.table_id,
+                        table_name,
+                        table_id,
+                        engine,
                    )
                    .await?;
-                schema.register_table(request.table_name, request.table)?;
+                increment_gauge!(
+                    crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
+                    1.0,
+                    &[crate::metrics::db_label(&catalog_name, &schema_name)],
+                );
                Ok(true)
            }
        }
    }

    async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
-        let started = self.init_lock.lock().await;
-
-        ensure!(
-            *started,
-            IllegalManagerStateSnafu {
-                msg: "Catalog manager not started",
-            }
-        );
+        self.check_state().await?;

        let catalog_name = &request.catalog;
        let schema_name = &request.schema;

-        let catalog = self
-            .catalogs
-            .catalog(catalog_name)?
-            .context(CatalogNotFoundSnafu { catalog_name })?;
+        self.check_catalog_schema_exist(catalog_name, schema_name)
+            .await?;
+        ensure!(
+            self.catalogs
+                .table(catalog_name, schema_name, &request.new_table_name)
+                .await?
+                .is_none(),
+            TableExistsSnafu {
+                table: &request.new_table_name
+            }
+        );

-        let schema = catalog
-            .schema(schema_name)?
-            .with_context(|| SchemaNotFoundSnafu {
-                catalog: catalog_name,
-                schema: schema_name,
+        let _lock = self.register_lock.lock().await;
+        let old_table = self
+            .catalogs
+            .table(catalog_name, schema_name, &request.table_name)
+            .await?
+            .context(TableNotExistSnafu {
+                table: &request.table_name,
            })?;

+        let engine = old_table.table_info().meta.engine.to_string();
        // rename table in system catalog
-        self.system
+        let _ = self
+            .system
            .register_table(
                catalog_name.clone(),
                schema_name.clone(),
                request.new_table_name.clone(),
                request.table_id,
+                engine,
            )
            .await?;
-        Ok(schema
-            .rename_table(&request.table_name, request.new_table_name)
-            .is_ok())
+
+        self.catalogs.rename_table(request).await
    }

-    async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
-        {
-            let started = *self.init_lock.lock().await;
-            ensure!(started, IllegalManagerStateSnafu { msg: "not started" });
-        }
+    async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
+        self.check_state().await?;

        {
            let _ = self.register_lock.lock().await;
@@ -443,67 +483,63 @@ impl CatalogManager for LocalCatalogManager {
                .ident
                .table_id;

-            if !self.system.deregister_table(&request, table_id).await? {
-                return Ok(false);
-            }
-
+            self.system.deregister_table(&request, table_id).await?;
            self.catalogs.deregister_table(request).await
        }
    }

    async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
-        let started = self.init_lock.lock().await;
-        ensure!(
-            *started,
-            IllegalManagerStateSnafu {
-                msg: "Catalog manager not started",
-            }
-        );
+        self.check_state().await?;
+
        let catalog_name = &request.catalog;
        let schema_name = &request.schema;

-        let catalog = self
-            .catalogs
-            .catalog(catalog_name)?
-            .context(CatalogNotFoundSnafu { catalog_name })?;
+        if !self.catalogs.catalog_exist(catalog_name).await? {
+            return CatalogNotFoundSnafu { catalog_name }.fail()?;
+        }

        {
            let _lock = self.register_lock.lock().await;
            ensure!(
-                catalog.schema(schema_name)?.is_none(),
+                !self
+                    .catalogs
+                    .schema_exist(catalog_name, schema_name)
+                    .await?,
                SchemaExistsSnafu {
                    schema: schema_name,
                }
            );
-            self.system
-                .register_schema(request.catalog, schema_name.clone())
+            let _ = self
+                .system
+                .register_schema(request.catalog.clone(), schema_name.clone())
                .await?;
-            catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
-            Ok(true)
+            self.catalogs.register_schema_sync(request)
        }
    }

+    async fn deregister_schema(&self, _request: DeregisterSchemaRequest) -> Result<bool> {
+        UnimplementedSnafu {
+            operation: "deregister schema",
+        }
+        .fail()
+    }
+
    async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
-        ensure!(
-            !*self.init_lock.lock().await,
-            IllegalManagerStateSnafu {
-                msg: "Catalog manager already started",
-            }
-        );
+        let catalog_name = request.create_table_request.catalog_name.clone();
+        let schema_name = request.create_table_request.schema_name.clone();

        let mut sys_table_requests = self.system_table_requests.lock().await;
        sys_table_requests.push(request);
-
+        increment_gauge!(
+            crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
+            1.0,
+            &[crate::metrics::db_label(&catalog_name, &schema_name)],
+        );
        Ok(())
    }

-    fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
-        self.catalogs
-            .catalog(catalog)?
-            .context(CatalogNotFoundSnafu {
-                catalog_name: catalog,
-            })?
-            .schema(schema)
+    async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> {
+        self.catalogs.schema_exist(catalog, schema).await
    }

    async fn table(
@@ -512,17 +548,48 @@ impl CatalogManager for LocalCatalogManager {
        schema_name: &str,
        table_name: &str,
    ) -> Result<Option<TableRef>> {
-        let catalog = self
-            .catalogs
-            .catalog(catalog_name)?
-            .context(CatalogNotFoundSnafu { catalog_name })?;
-        let schema = catalog
-            .schema(schema_name)?
-            .with_context(|| SchemaNotFoundSnafu {
-                catalog: catalog_name,
-                schema: schema_name,
-            })?;
-        schema.table(table_name).await
+        if schema_name == INFORMATION_SCHEMA_NAME {
+            let manager: CatalogManagerRef = self.catalogs.clone() as _;
+            let provider =
+                InformationSchemaProvider::new(catalog_name.to_string(), Arc::downgrade(&manager));
+            return provider.table(table_name);
+        }
+
+        self.catalogs
+            .table(catalog_name, schema_name, table_name)
+            .await
+    }
+
+    async fn catalog_exist(&self, catalog: &str) -> Result<bool> {
+        if catalog.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
+            Ok(true)
+        } else {
+            self.catalogs.catalog_exist(catalog).await
+        }
+    }
+
+    async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
+        self.catalogs.table_exist(catalog, schema, table).await
+    }
+
+    async fn catalog_names(&self) -> Result<Vec<String>> {
+        self.catalogs.catalog_names().await
+    }
+
+    async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
+        self.catalogs.schema_names(catalog_name).await
+    }
+
+    async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
+        self.catalogs.table_names(catalog_name, schema_name).await
+    }
+
+    async fn register_catalog(&self, name: String) -> Result<bool> {
+        self.catalogs.register_catalog(name).await
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
    }
 }

@@ -530,6 +597,8 @@ impl CatalogManager for LocalCatalogManager {
 mod tests {
    use std::assert_matches::assert_matches;

+    use mito::engine::MITO_ENGINE;
+
    use super::*;
    use crate::system::{CatalogEntry, SchemaEntry};

@@ -541,6 +610,8 @@ mod tests {
                schema_name: "S1".to_string(),
                table_name: "T1".to_string(),
                table_id: 1,
+                engine: MITO_ENGINE.to_string(),
+                is_deleted: false,
            }),
            Entry::Catalog(CatalogEntry {
                catalog_name: "C2".to_string(),
@@ -561,6 +632,8 @@ mod tests {
                schema_name: "S1".to_string(),
                table_name: "T2".to_string(),
                table_id: 2,
+                engine: MITO_ENGINE.to_string(),
+                is_deleted: false,
            }),
        ];
        let res = LocalCatalogManager::sort_entries(vec);
--- a/src/catalog/src/local/memory.rs
+++ b/src/catalog/src/local/memory.rs
@@ -18,28 +18,27 @@ use std::collections::HashMap;
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, RwLock};

-use async_trait::async_trait;
-use common_catalog::consts::MIN_USER_TABLE_ID;
-use common_telemetry::error;
-use snafu::{ensure, OptionExt};
+use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
+use metrics::{decrement_gauge, increment_gauge};
+use snafu::OptionExt;
 use table::metadata::TableId;
 use table::table::TableIdProvider;
 use table::TableRef;

 use crate::error::{
-    self, CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu,
+    CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu,
 };
-use crate::schema::SchemaProvider;
 use crate::{
-    CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
-    RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
-    SchemaProviderRef,
+    CatalogManager, DeregisterSchemaRequest, DeregisterTableRequest, RegisterSchemaRequest,
+    RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
 };

+type SchemaEntries = HashMap<String, HashMap<String, TableRef>>;
+
 /// Simple in-memory list of catalogs
 pub struct MemoryCatalogManager {
    /// Collection of catalogs containing schemas and ultimately Tables
-    pub catalogs: RwLock<HashMap<String, CatalogProviderRef>>,
+    pub catalogs: RwLock<HashMap<String, SchemaEntries>>,
    pub table_id: AtomicU32,
 }

@@ -49,13 +48,14 @@ impl Default for MemoryCatalogManager {
            table_id: AtomicU32::new(MIN_USER_TABLE_ID),
            catalogs: Default::default(),
        };
-        let default_catalog = Arc::new(MemoryCatalogProvider::new());
-        manager
-            .register_catalog("greptime".to_string(), default_catalog.clone())
-            .unwrap();
-        default_catalog
-            .register_schema("public".to_string(), Arc::new(MemorySchemaProvider::new()))
-            .unwrap();
+
+        let catalog = HashMap::from([(DEFAULT_SCHEMA_NAME.to_string(), HashMap::new())]);
+        let _ = manager
+            .catalogs
+            .write()
+            .unwrap()
+            .insert(DEFAULT_CATALOG_NAME.to_string(), catalog);
+
        manager
    }
 }
@@ -75,70 +75,94 @@ impl CatalogManager for MemoryCatalogManager {
    }

    async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
-        let catalogs = self.catalogs.write().unwrap();
-        let catalog = catalogs
-            .get(&request.catalog)
-            .context(CatalogNotFoundSnafu {
-                catalog_name: &request.catalog,
-            })?
-            .clone();
-        let schema = catalog
-            .schema(&request.schema)?
-            .with_context(|| SchemaNotFoundSnafu {
-                catalog: &request.catalog,
-                schema: &request.schema,
-            })?;
-        schema
-            .register_table(request.table_name, request.table)
-            .map(|v| v.is_none())
+        self.register_table_sync(request)
    }

    async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
-        let catalogs = self.catalogs.write().unwrap();
-        let catalog = catalogs
-            .get(&request.catalog)
-            .context(CatalogNotFoundSnafu {
+        let mut catalogs = self.catalogs.write().unwrap();
+        let schema = catalogs
+            .get_mut(&request.catalog)
+            .with_context(|| CatalogNotFoundSnafu {
                catalog_name: &request.catalog,
            })?
-            .clone();
-        let schema = catalog
-            .schema(&request.schema)?
+            .get_mut(&request.schema)
            .with_context(|| SchemaNotFoundSnafu {
                catalog: &request.catalog,
                schema: &request.schema,
            })?;
-        Ok(schema
-            .rename_table(&request.table_name, request.new_table_name)
-            .is_ok())
+
+        // check old and new table names
+        if !schema.contains_key(&request.table_name) {
+            return TableNotFoundSnafu {
+                table_info: request.table_name.to_string(),
+            }
+            .fail()?;
+        }
+        if schema.contains_key(&request.new_table_name) {
+            return TableExistsSnafu {
+                table: &request.new_table_name,
+            }
+            .fail();
+        }
+
+        let table = schema.remove(&request.table_name).unwrap();
+        let _ = schema.insert(request.new_table_name, table);
+
+        Ok(true)
    }

-    async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
-        let catalogs = self.catalogs.write().unwrap();
-        let catalog = catalogs
-            .get(&request.catalog)
-            .context(CatalogNotFoundSnafu {
+    async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
+        let mut catalogs = self.catalogs.write().unwrap();
+        let schema = catalogs
+            .get_mut(&request.catalog)
+            .with_context(|| CatalogNotFoundSnafu {
                catalog_name: &request.catalog,
            })?
-            .clone();
-        let schema = catalog
-            .schema(&request.schema)?
+            .get_mut(&request.schema)
            .with_context(|| SchemaNotFoundSnafu {
                catalog: &request.catalog,
                schema: &request.schema,
            })?;
-        schema
-            .deregister_table(&request.table_name)
-            .map(|v| v.is_some())
+        let result = schema.remove(&request.table_name);
+        if result.is_some() {
+            decrement_gauge!(
+                crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
+                1.0,
+                &[crate::metrics::db_label(&request.catalog, &request.schema)],
+            );
+        }
+        Ok(())
    }

    async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
-        let catalogs = self.catalogs.write().unwrap();
-        let catalog = catalogs
-            .get(&request.catalog)
-            .context(CatalogNotFoundSnafu {
+        self.register_schema_sync(request)
+    }
+
+    async fn deregister_schema(&self, request: DeregisterSchemaRequest) -> Result<bool> {
+        let mut catalogs = self.catalogs.write().unwrap();
+        let schemas = catalogs
+            .get_mut(&request.catalog)
+            .with_context(|| CatalogNotFoundSnafu {
                catalog_name: &request.catalog,
            })?;
-        catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
+        let table_count = schemas
+            .remove(&request.schema)
+            .with_context(|| SchemaNotFoundSnafu {
+                catalog: &request.catalog,
+                schema: &request.schema,
+            })?
+            .len();
+        decrement_gauge!(
+            crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
+            table_count as f64,
+            &[crate::metrics::db_label(&request.catalog, &request.schema)],
+        );
+
+        decrement_gauge!(
+            crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT,
+            1.0,
+            &[crate::metrics::db_label(&request.catalog, &request.schema)],
+        );
        Ok(true)
    }

@@ -147,13 +171,16 @@ impl CatalogManager for MemoryCatalogManager {
        Ok(())
    }

-    fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
-        let catalogs = self.catalogs.read().unwrap();
-        if let Some(c) = catalogs.get(catalog) {
-            c.schema(schema)
-        } else {
-            Ok(None)
-        }
+    async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> {
+        Ok(self
+            .catalogs
+            .read()
+            .unwrap()
+            .get(catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: catalog,
+            })?
+            .contains_key(schema))
    }

    async fn table(
@@ -162,193 +189,166 @@ impl CatalogManager for MemoryCatalogManager {
        schema: &str,
        table_name: &str,
    ) -> Result<Option<TableRef>> {
-        let catalog = {
-            let c = self.catalogs.read().unwrap();
-            let Some(c) = c.get(catalog) else { return Ok(None) };
-            c.clone()
+        let result = try {
+            self.catalogs
+                .read()
+                .unwrap()
+                .get(catalog)?
+                .get(schema)?
+                .get(table_name)
+                .cloned()?
        };
-        match catalog.schema(schema)? {
-            None => Ok(None),
-            Some(s) => s.table(table_name).await,
-        }
+        Ok(result)
+    }
+
+    async fn catalog_exist(&self, catalog: &str) -> Result<bool> {
+        Ok(self.catalogs.read().unwrap().get(catalog).is_some())
+    }
+
+    async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
+        let catalogs = self.catalogs.read().unwrap();
+        Ok(catalogs
+            .get(catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: catalog,
+            })?
+            .get(schema)
+            .with_context(|| SchemaNotFoundSnafu { catalog, schema })?
+            .contains_key(table))
+    }
+
+    async fn catalog_names(&self) -> Result<Vec<String>> {
+        Ok(self.catalogs.read().unwrap().keys().cloned().collect())
+    }
+
+    async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
+        Ok(self
+            .catalogs
+            .read()
+            .unwrap()
+            .get(catalog_name)
+            .with_context(|| CatalogNotFoundSnafu { catalog_name })?
+            .keys()
+            .cloned()
+            .collect())
+    }
+
+    async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
+        Ok(self
+            .catalogs
+            .read()
+            .unwrap()
+            .get(catalog_name)
+            .with_context(|| CatalogNotFoundSnafu { catalog_name })?
+            .get(schema_name)
+            .with_context(|| SchemaNotFoundSnafu {
+                catalog: catalog_name,
+                schema: schema_name,
+            })?
+            .keys()
+            .cloned()
+            .collect())
+    }
+
+    async fn register_catalog(&self, name: String) -> Result<bool> {
+        self.register_catalog_sync(name)
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
    }
 }

 impl MemoryCatalogManager {
-    /// Registers a catalog and return `None` if no catalog with the same name was already
-    /// registered, or `Some` with the previously registered catalog.
-    pub fn register_catalog_if_absent(
-        &self,
-        name: String,
-        catalog: CatalogProviderRef,
-    ) -> Option<CatalogProviderRef> {
+    /// Registers a catalog and return the catalog already exist
+    pub fn register_catalog_if_absent(&self, name: String) -> bool {
        let mut catalogs = self.catalogs.write().unwrap();
        let entry = catalogs.entry(name);
        match entry {
-            Entry::Occupied(v) => Some(v.get().clone()),
+            Entry::Occupied(_) => true,
            Entry::Vacant(v) => {
-                v.insert(catalog);
-                None
+                let _ = v.insert(HashMap::new());
+                false
            }
        }
    }
-}

-impl CatalogList for MemoryCatalogManager {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: CatalogProviderRef,
-    ) -> Result<Option<CatalogProviderRef>> {
+    pub fn register_catalog_sync(&self, name: String) -> Result<bool> {
        let mut catalogs = self.catalogs.write().unwrap();
-        Ok(catalogs.insert(name, catalog))
-    }

-    fn catalog_names(&self) -> Result<Vec<String>> {
-        let catalogs = self.catalogs.read().unwrap();
-        Ok(catalogs.keys().map(|s| s.to_string()).collect())
-    }
-
-    fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
-        let catalogs = self.catalogs.read().unwrap();
-        Ok(catalogs.get(name).cloned())
-    }
-}
-
-impl Default for MemoryCatalogProvider {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Simple in-memory implementation of a catalog.
-pub struct MemoryCatalogProvider {
-    schemas: RwLock<HashMap<String, Arc<dyn SchemaProvider>>>,
-}
-
-impl MemoryCatalogProvider {
-    /// Instantiates a new MemoryCatalogProvider with an empty collection of schemas.
-    pub fn new() -> Self {
-        Self {
-            schemas: RwLock::new(HashMap::new()),
+        match catalogs.entry(name) {
+            Entry::Vacant(e) => {
+                e.insert(HashMap::new());
+                increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0);
+                Ok(true)
+            }
+            Entry::Occupied(_) => Ok(false),
        }
    }
-}

-impl CatalogProvider for MemoryCatalogProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
+    pub fn register_schema_sync(&self, request: RegisterSchemaRequest) -> Result<bool> {
+        let mut catalogs = self.catalogs.write().unwrap();
+        let catalog = catalogs
+            .get_mut(&request.catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: &request.catalog,
+            })?;
+
+        match catalog.entry(request.schema) {
+            Entry::Vacant(e) => {
+                e.insert(HashMap::new());
+                increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0);
+                Ok(true)
+            }
+            Entry::Occupied(_) => Ok(false),
+        }
    }

-    fn schema_names(&self) -> Result<Vec<String>> {
-        let schemas = self.schemas.read().unwrap();
-        Ok(schemas.keys().cloned().collect())
-    }
+    pub fn register_table_sync(&self, request: RegisterTableRequest) -> Result<bool> {
+        let mut catalogs = self.catalogs.write().unwrap();
+        let schema = catalogs
+            .get_mut(&request.catalog)
+            .with_context(|| CatalogNotFoundSnafu {
+                catalog_name: &request.catalog,
+            })?
+            .get_mut(&request.schema)
+            .with_context(|| SchemaNotFoundSnafu {
+                catalog: &request.catalog,
+                schema: &request.schema,
+            })?;

-    fn register_schema(
-        &self,
-        name: String,
-        schema: SchemaProviderRef,
-    ) -> Result<Option<SchemaProviderRef>> {
-        let mut schemas = self.schemas.write().unwrap();
-        ensure!(
-            !schemas.contains_key(&name),
-            error::SchemaExistsSnafu { schema: &name }
+        if schema.contains_key(&request.table_name) {
+            return TableExistsSnafu {
+                table: &request.table_name,
+            }
+            .fail();
+        }
+        schema.insert(request.table_name, request.table);
+        increment_gauge!(
+            crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
+            1.0,
+            &[crate::metrics::db_label(&request.catalog, &request.schema)],
        );
-        Ok(schemas.insert(name, schema))
+        Ok(true)
    }

-    fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
-        let schemas = self.schemas.read().unwrap();
-        Ok(schemas.get(name).cloned())
-    }
-}
-
-/// Simple in-memory implementation of a schema.
-pub struct MemorySchemaProvider {
-    tables: RwLock<HashMap<String, TableRef>>,
-}
-
-impl MemorySchemaProvider {
-    /// Instantiates a new MemorySchemaProvider with an empty collection of tables.
-    pub fn new() -> Self {
-        Self {
-            tables: RwLock::new(HashMap::new()),
-        }
-    }
-}
-
-impl Default for MemorySchemaProvider {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[async_trait]
-impl SchemaProvider for MemorySchemaProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn table_names(&self) -> Result<Vec<String>> {
-        let tables = self.tables.read().unwrap();
-        Ok(tables.keys().cloned().collect())
-    }
-
-    async fn table(&self, name: &str) -> Result<Option<TableRef>> {
-        let tables = self.tables.read().unwrap();
-        Ok(tables.get(name).cloned())
-    }
-
-    fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
-        let mut tables = self.tables.write().unwrap();
-        if let Some(existing) = tables.get(name.as_str()) {
-            // if table with the same name but different table id exists, then it's a fatal bug
-            if existing.table_info().ident.table_id != table.table_info().ident.table_id {
-                error!(
-                    "Unexpected table register: {:?}, existing: {:?}",
-                    table.table_info(),
-                    existing.table_info()
-                );
-                return TableExistsSnafu { table: name }.fail()?;
-            }
-            Ok(Some(existing.clone()))
-        } else {
-            Ok(tables.insert(name, table))
-        }
-    }
-
-    fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef> {
-        let mut tables = self.tables.write().unwrap();
-        if tables.get(name).is_some() {
-            let table = tables.remove(name).unwrap();
-            tables.insert(new_name, table.clone());
-            Ok(table)
-        } else {
-            TableNotFoundSnafu {
-                table_info: name.to_string(),
-            }
-            .fail()?
-        }
-    }
-
-    fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
-        let mut tables = self.tables.write().unwrap();
-        Ok(tables.remove(name))
-    }
-
-    fn table_exist(&self, name: &str) -> Result<bool> {
-        let tables = self.tables.read().unwrap();
-        Ok(tables.contains_key(name))
+    #[cfg(any(test, feature = "testing"))]
+    pub fn new_with_table(table: TableRef) -> Self {
+        let manager = Self::default();
+        let request = RegisterTableRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table.table_info().name.clone(),
+            table_id: table.table_info().ident.table_id,
+            table,
+        };
+        let _ = manager.register_table_sync(request).unwrap();
+        manager
    }
 }

 /// Create a memory catalog list contains a numbers table for test
-pub fn new_memory_catalog_list() -> Result<Arc<MemoryCatalogManager>> {
+pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> {
    Ok(Arc::new(MemoryCatalogManager::default()))
 }

@@ -356,83 +356,100 @@ pub fn new_memory_catalog_list() -> Result<Arc<MemoryCatalogManager>> {
 mod tests {
    use common_catalog::consts::*;
    use common_error::ext::ErrorExt;
-    use common_error::prelude::StatusCode;
-    use table::table::numbers::NumbersTable;
+    use common_error::status_code::StatusCode;
+    use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};

    use super::*;

    #[tokio::test]
    async fn test_new_memory_catalog_list() {
-        let catalog_list = new_memory_catalog_list().unwrap();
-        let default_catalog = catalog_list.catalog(DEFAULT_CATALOG_NAME).unwrap().unwrap();
+        let catalog_list = new_memory_catalog_manager().unwrap();

-        let default_schema = default_catalog
-            .schema(DEFAULT_SCHEMA_NAME)
-            .unwrap()
+        let register_request = RegisterTableRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: NUMBERS_TABLE_NAME.to_string(),
+            table_id: NUMBERS_TABLE_ID,
+            table: Arc::new(NumbersTable::default()),
+        };
+
+        let _ = catalog_list.register_table(register_request).await.unwrap();
+        let table = catalog_list
+            .table(
+                DEFAULT_CATALOG_NAME,
+                DEFAULT_SCHEMA_NAME,
+                NUMBERS_TABLE_NAME,
+            )
+            .await
            .unwrap();
-
-        default_schema
-            .register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
-            .unwrap();
-
-        let table = default_schema.table("numbers").await.unwrap();
-        assert!(table.is_some());
-        assert!(default_schema.table("not_exists").await.unwrap().is_none());
-    }
-
-    #[tokio::test]
-    async fn test_mem_provider() {
-        let provider = MemorySchemaProvider::new();
-        let table_name = "numbers";
-        assert!(!provider.table_exist(table_name).unwrap());
-        assert!(provider.deregister_table(table_name).unwrap().is_none());
-        let test_table = NumbersTable::default();
-        // register table successfully
-        assert!(provider
-            .register_table(table_name.to_string(), Arc::new(test_table))
+        let _ = table.unwrap();
+        assert!(catalog_list
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
+            .await
            .unwrap()
            .is_none());
-        assert!(provider.table_exist(table_name).unwrap());
-        let other_table = NumbersTable::new(12);
-        let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
-        let err = result.err().unwrap();
-        assert!(err.backtrace_opt().is_some());
-        assert_eq!(StatusCode::TableAlreadyExists, err.status_code());
    }

    #[tokio::test]
-    async fn test_mem_provider_rename_table() {
-        let provider = MemorySchemaProvider::new();
-        let table_name = "num";
-        assert!(!provider.table_exist(table_name).unwrap());
-        let test_table: TableRef = Arc::new(NumbersTable::default());
+    async fn test_mem_manager_rename_table() {
+        let catalog = MemoryCatalogManager::default();
+        let table_name = "test_table";
+        assert!(!catalog
+            .table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap());
        // register test table
-        assert!(provider
-            .register_table(table_name.to_string(), test_table.clone())
-            .unwrap()
-            .is_none());
-        assert!(provider.table_exist(table_name).unwrap());
+        let table_id = 2333;
+        let register_request = RegisterTableRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table_name.to_string(),
+            table_id,
+            table: Arc::new(NumbersTable::new(table_id)),
+        };
+        assert!(catalog.register_table(register_request).await.unwrap());
+        assert!(catalog
+            .table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap());

        // rename test table
-        let new_table_name = "numbers";
-        provider
-            .rename_table(table_name, new_table_name.to_string())
-            .unwrap();
+        let new_table_name = "test_table_renamed";
+        let rename_request = RenameTableRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: table_name.to_string(),
+            new_table_name: new_table_name.to_string(),
+            table_id,
+        };
+        let _ = catalog.rename_table(rename_request).await.unwrap();

        // test old table name not exist
-        assert!(!provider.table_exist(table_name).unwrap());
-        assert!(provider.deregister_table(table_name).unwrap().is_none());
+        assert!(!catalog
+            .table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap());

        // test new table name exists
-        assert!(provider.table_exist(new_table_name).unwrap());
-        let registered_table = provider.table(new_table_name).await.unwrap().unwrap();
-        assert_eq!(
-            registered_table.table_info().ident.table_id,
-            test_table.table_info().ident.table_id
-        );
+        assert!(catalog
+            .table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
+            .await
+            .unwrap());
+        let registered_table = catalog
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(registered_table.table_info().ident.table_id, table_id);

-        let other_table = Arc::new(NumbersTable::new(2));
-        let result = provider.register_table(new_table_name.to_string(), other_table);
+        let dup_register_request = RegisterTableRequest {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table_name: new_table_name.to_string(),
+            table_id: table_id + 1,
+            table: Arc::new(NumbersTable::new(table_id + 1)),
+        };
+        let result = catalog.register_table(dup_register_request).await;
        let err = result.err().unwrap();
        assert_eq!(StatusCode::TableAlreadyExists, err.status_code());
    }
@@ -440,15 +457,11 @@ mod tests {
    #[tokio::test]
    async fn test_catalog_rename_table() {
        let catalog = MemoryCatalogManager::default();
-        let schema = catalog
-            .schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
-            .unwrap()
-            .unwrap();
-
-        // register table
        let table_name = "num";
        let table_id = 2333;
        let table: TableRef = Arc::new(NumbersTable::new(table_id));
+
+        // register table
        let register_table_req = RegisterTableRequest {
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
@@ -457,10 +470,14 @@ mod tests {
            table,
        };
        assert!(catalog.register_table(register_table_req).await.unwrap());
-        assert!(schema.table_exist(table_name).unwrap());
+        assert!(catalog
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap()
+            .is_some());

        // rename table
-        let new_table_name = "numbers";
+        let new_table_name = "numbers_new";
        let rename_table_req = RenameTableRequest {
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
@@ -469,8 +486,16 @@ mod tests {
            table_id,
        };
        assert!(catalog.rename_table(rename_table_req).await.unwrap());
-        assert!(!schema.table_exist(table_name).unwrap());
-        assert!(schema.table_exist(new_table_name).unwrap());
+        assert!(catalog
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap()
+            .is_none());
+        assert!(catalog
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
+            .await
+            .unwrap()
+            .is_some());

        let registered_table = catalog
            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
@@ -483,49 +508,80 @@ mod tests {
    #[test]
    pub fn test_register_if_absent() {
        let list = MemoryCatalogManager::default();
-        assert!(list
-            .register_catalog_if_absent(
-                "test_catalog".to_string(),
-                Arc::new(MemoryCatalogProvider::new())
-            )
-            .is_none());
-        list.register_catalog_if_absent(
-            "test_catalog".to_string(),
-            Arc::new(MemoryCatalogProvider::new()),
-        )
-        .unwrap();
-        list.as_any()
-            .downcast_ref::<MemoryCatalogManager>()
-            .unwrap();
+        assert!(!list.register_catalog_if_absent("test_catalog".to_string(),));
+        assert!(list.register_catalog_if_absent("test_catalog".to_string()));
    }

    #[tokio::test]
    pub async fn test_catalog_deregister_table() {
        let catalog = MemoryCatalogManager::default();
-        let schema = catalog
-            .schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
-            .unwrap()
-            .unwrap();
+        let table_name = "foo_table";

        let register_table_req = RegisterTableRequest {
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
-            table_name: "numbers".to_string(),
+            table_name: table_name.to_string(),
            table_id: 2333,
            table: Arc::new(NumbersTable::default()),
        };
-        catalog.register_table(register_table_req).await.unwrap();
-        assert!(schema.table_exist("numbers").unwrap());
+        let _ = catalog.register_table(register_table_req).await.unwrap();
+        assert!(catalog
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap()
+            .is_some());

        let deregister_table_req = DeregisterTableRequest {
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
-            table_name: "numbers".to_string(),
+            table_name: table_name.to_string(),
        };
        catalog
            .deregister_table(deregister_table_req)
            .await
            .unwrap();
-        assert!(!schema.table_exist("numbers").unwrap());
+        assert!(catalog
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .await
+            .unwrap()
+            .is_none());
+    }
+
+    #[tokio::test]
+    async fn test_catalog_deregister_schema() {
+        let catalog = MemoryCatalogManager::default();
+
+        // Registers a catalog, a schema, and a table.
+        let catalog_name = "foo_catalog".to_string();
+        let schema_name = "foo_schema".to_string();
+        let table_name = "foo_table".to_string();
+        let schema = RegisterSchemaRequest {
+            catalog: catalog_name.clone(),
+            schema: schema_name.clone(),
+        };
+        let table = RegisterTableRequest {
+            catalog: catalog_name.clone(),
+            schema: schema_name.clone(),
+            table_name,
+            table_id: 0,
+            table: Arc::new(NumbersTable::default()),
+        };
+        catalog
+            .register_catalog(catalog_name.clone())
+            .await
+            .unwrap();
+        catalog.register_schema(schema).await.unwrap();
+        catalog.register_table(table).await.unwrap();
+
+        let request = DeregisterSchemaRequest {
+            catalog: catalog_name.clone(),
+            schema: schema_name.clone(),
+        };
+
+        assert!(catalog.deregister_schema(request).await.unwrap());
+        assert!(!catalog
+            .schema_exist(&catalog_name, &schema_name)
+            .await
+            .unwrap());
    }
 }
--- a/src/catalog/src/metrics.rs
+++ b/src/catalog/src/metrics.rs
@@ -0,0 +1,29 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_catalog::build_db_string;
+
+pub(crate) const METRIC_DB_LABEL: &str = "db";
+
+pub(crate) const METRIC_CATALOG_MANAGER_CATALOG_COUNT: &str = "catalog.catalog_count";
+pub(crate) const METRIC_CATALOG_MANAGER_SCHEMA_COUNT: &str = "catalog.schema_count";
+pub(crate) const METRIC_CATALOG_MANAGER_TABLE_COUNT: &str = "catalog.table_count";
+
+pub(crate) const METRIC_CATALOG_KV_REMOTE_GET: &str = "catalog.kv.get.remote";
+pub(crate) const METRIC_CATALOG_KV_GET: &str = "catalog.kv.get";
+
+#[inline]
+pub(crate) fn db_label(catalog: &str, schema: &str) -> (&'static str, String) {
+    (METRIC_DB_LABEL, build_db_string(catalog, schema))
+}
--- a/src/catalog/src/remote.rs
+++ b/src/catalog/src/remote.rs
@@ -12,120 +12,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::fmt::Debug;
-use std::pin::Pin;
 use std::sync::Arc;

-pub use client::MetaKvBackend;
-use futures::Stream;
-use futures_util::StreamExt;
-pub use manager::{RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider};
-
-use crate::error::Error;
+pub use client::{CachedMetaKvBackend, MetaKvBackend};
+pub use manager::RemoteCatalogManager;

 mod client;
 mod manager;

-#[derive(Debug, Clone)]
-pub struct Kv(pub Vec<u8>, pub Vec<u8>);
-
-pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
+#[cfg(feature = "testing")]
+pub mod mock;
+pub mod region_alive_keeper;

 #[async_trait::async_trait]
-pub trait KvBackend: Send + Sync {
-    fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
-    where
-        'a: 'b;
-
-    async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
-
-    /// Compare and set value of key. `expect` is the expected value, if backend's current value associated
-    /// with key is the same as `expect`, the value will be updated to `val`.
-    ///
-    /// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
-    /// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
-    /// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
-    /// - If any error happens during operation, an `Err(Error)` will be returned.
-    async fn compare_and_set(
-        &self,
-        key: &[u8],
-        expect: &[u8],
-        val: &[u8],
-    ) -> Result<Result<(), Option<Vec<u8>>>, Error>;
-
-    async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
-
-    async fn delete(&self, key: &[u8]) -> Result<(), Error> {
-        self.delete_range(key, &[]).await
-    }
-
-    /// Default get is implemented based on `range` method.
-    async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
-        let mut iter = self.range(key);
-        while let Some(r) = iter.next().await {
-            let kv = r?;
-            if kv.0 == key {
-                return Ok(Some(kv));
-            }
-        }
-        return Ok(None);
-    }
+pub trait KvCacheInvalidator: Send + Sync {
+    async fn invalidate_key(&self, key: &[u8]);
 }

-pub type KvBackendRef = Arc<dyn KvBackend>;
-
-#[cfg(test)]
-mod tests {
-    use async_stream::stream;
-
-    use super::*;
-
-    struct MockKvBackend {}
-
-    #[async_trait::async_trait]
-    impl KvBackend for MockKvBackend {
-        fn range<'a, 'b>(&'a self, _key: &[u8]) -> ValueIter<'b, Error>
-        where
-            'a: 'b,
-        {
-            Box::pin(stream!({
-                for i in 0..3 {
-                    yield Ok(Kv(
-                        i.to_string().as_bytes().to_vec(),
-                        i.to_string().as_bytes().to_vec(),
-                    ))
-                }
-            }))
-        }
-
-        async fn set(&self, _key: &[u8], _val: &[u8]) -> Result<(), Error> {
-            unimplemented!()
-        }
-
-        async fn compare_and_set(
-            &self,
-            _key: &[u8],
-            _expect: &[u8],
-            _val: &[u8],
-        ) -> Result<Result<(), Option<Vec<u8>>>, Error> {
-            unimplemented!()
-        }
-
-        async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
-            unimplemented!()
-        }
-    }
-
-    #[tokio::test]
-    async fn test_get() {
-        let backend = MockKvBackend {};
-        let result = backend.get(0.to_string().as_bytes()).await;
-        assert_eq!(0.to_string().as_bytes(), result.unwrap().unwrap().0);
-        let result = backend.get(1.to_string().as_bytes()).await;
-        assert_eq!(1.to_string().as_bytes(), result.unwrap().unwrap().0);
-        let result = backend.get(2.to_string().as_bytes()).await;
-        assert_eq!(2.to_string().as_bytes(), result.unwrap().unwrap().0);
-        let result = backend.get(3.to_string().as_bytes()).await;
-        assert!(result.unwrap().is_none());
-    }
-}
+pub type KvCacheInvalidatorRef = Arc<dyn KvCacheInvalidator>;
--- a/src/catalog/src/remote/client.rs
+++ b/src/catalog/src/remote/client.rs
@@ -12,97 +12,321 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
+use std::time::Duration;

-use async_stream::stream;
-use common_telemetry::info;
+use common_error::ext::BoxedError;
+use common_meta::error::Error::{CacheNotGet, GetKvCache};
+use common_meta::error::{CacheNotGetSnafu, Error, MetaSrvSnafu, Result};
+use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService};
+use common_meta::rpc::store::{
+    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
+    BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
+    DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
+    RangeRequest, RangeResponse,
+};
+use common_meta::rpc::KeyValue;
+use common_telemetry::timer;
 use meta_client::client::MetaClient;
-use meta_client::rpc::{CompareAndPutRequest, DeleteRangeRequest, PutRequest, RangeRequest};
-use snafu::ResultExt;
+use moka::future::{Cache, CacheBuilder};
+use snafu::{OptionExt, ResultExt};
+
+use super::KvCacheInvalidator;
+use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
+
+const CACHE_MAX_CAPACITY: u64 = 10000;
+const CACHE_TTL_SECOND: u64 = 10 * 60;
+const CACHE_TTI_SECOND: u64 = 5 * 60;
+
+pub type CacheBackendRef = Arc<Cache<Vec<u8>, KeyValue>>;
+
+pub struct CachedMetaKvBackend {
+    kv_backend: KvBackendRef,
+    cache: CacheBackendRef,
+    name: String,
+}
+
+impl TxnService for CachedMetaKvBackend {
+    type Error = Error;
+}
+
+#[async_trait::async_trait]
+impl KvBackend for CachedMetaKvBackend {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
+        self.kv_backend.range(req).await
+    }
+
+    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
+        let key = &req.key.clone();
+
+        let ret = self.kv_backend.put(req).await;
+
+        if ret.is_ok() {
+            self.invalidate_key(key).await;
+        }
+
+        ret
+    }
+
+    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
+        let keys = req
+            .kvs
+            .iter()
+            .map(|kv| kv.key().to_vec())
+            .collect::<Vec<_>>();
+
+        let resp = self.kv_backend.batch_put(req).await;
+
+        if resp.is_ok() {
+            for key in keys {
+                self.invalidate_key(&key).await;
+            }
+        }
+
+        resp
+    }
+
+    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
+        self.kv_backend.batch_get(req).await
+    }
+
+    async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
+        let key = &req.key.clone();
+
+        let ret = self.kv_backend.compare_and_put(req).await;
+
+        if ret.is_ok() {
+            self.invalidate_key(key).await;
+        }
+
+        ret
+    }
+
+    async fn delete_range(&self, mut req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+        let prev_kv = req.prev_kv;
+
+        req.prev_kv = true;
+        let resp = self.kv_backend.delete_range(req).await;
+        match resp {
+            Ok(mut resp) => {
+                for prev_kv in resp.prev_kvs.iter() {
+                    self.invalidate_key(prev_kv.key()).await;
+                }
+
+                if !prev_kv {
+                    resp.prev_kvs = vec![];
+                }
+                Ok(resp)
+            }
+            Err(e) => Err(e),
+        }
+    }
+
+    async fn batch_delete(&self, mut req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
+        let prev_kv = req.prev_kv;
+
+        req.prev_kv = true;
+        let resp = self.kv_backend.batch_delete(req).await;
+        match resp {
+            Ok(mut resp) => {
+                for prev_kv in resp.prev_kvs.iter() {
+                    self.invalidate_key(prev_kv.key()).await;
+                }
+
+                if !prev_kv {
+                    resp.prev_kvs = vec![];
+                }
+                Ok(resp)
+            }
+            Err(e) => Err(e),
+        }
+    }
+
+    async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
+        let from_key = &req.from_key.clone();
+        let to_key = &req.to_key.clone();
+
+        let ret = self.kv_backend.move_value(req).await;
+
+        if ret.is_ok() {
+            self.invalidate_key(from_key).await;
+            self.invalidate_key(to_key).await;
+        }
+
+        ret
+    }
+
+    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
+        let _timer = timer!(METRIC_CATALOG_KV_GET);
+
+        let init = async {
+            let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
+            self.kv_backend.get(key).await.map(|val| {
+                val.with_context(|| CacheNotGetSnafu {
+                    key: String::from_utf8_lossy(key),
+                })
+            })?
+        };
+
+        // currently moka doesn't have `optionally_try_get_with_by_ref`
+        // TODO(fys): change to moka method when available
+        // https://github.com/moka-rs/moka/issues/254
+        match self.cache.try_get_with_by_ref(key, init).await {
+            Ok(val) => Ok(Some(val)),
+            Err(e) => match e.as_ref() {
+                CacheNotGet { .. } => Ok(None),
+                _ => Err(e),
+            },
+        }
+        .map_err(|e| GetKvCache {
+            err_msg: e.to_string(),
+        })
+    }
+}
+
+#[async_trait::async_trait]
+impl KvCacheInvalidator for CachedMetaKvBackend {
+    async fn invalidate_key(&self, key: &[u8]) {
+        self.cache.invalidate(key).await
+    }
+}
+
+impl CachedMetaKvBackend {
+    pub fn new(client: Arc<MetaClient>) -> Self {
+        let kv_backend = Arc::new(MetaKvBackend { client });
+        Self::wrap(kv_backend)
+    }
+
+    pub fn wrap(kv_backend: KvBackendRef) -> Self {
+        let cache = Arc::new(
+            CacheBuilder::new(CACHE_MAX_CAPACITY)
+                .time_to_live(Duration::from_secs(CACHE_TTL_SECOND))
+                .time_to_idle(Duration::from_secs(CACHE_TTI_SECOND))
+                .build(),
+        );
+
+        let name = format!("CachedKvBackend({})", kv_backend.name());
+        Self {
+            kv_backend,
+            cache,
+            name,
+        }
+    }
+
+    pub fn cache(&self) -> &CacheBackendRef {
+        &self.cache
+    }
+}

-use crate::error::{Error, MetaSrvSnafu};
-use crate::remote::{Kv, KvBackend, ValueIter};
 #[derive(Debug)]
 pub struct MetaKvBackend {
    pub client: Arc<MetaClient>,
 }

+impl TxnService for MetaKvBackend {
+    type Error = Error;
+}
+
 /// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since
 /// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead
 /// comparing to `Accessor`'s list and get method.
 #[async_trait::async_trait]
 impl KvBackend for MetaKvBackend {
-    fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
-    where
-        'a: 'b,
-    {
-        let key = key.to_vec();
-        Box::pin(stream!({
-            let mut resp = self
-                .client
-                .range(RangeRequest::new().with_prefix(key))
-                .await
-                .context(MetaSrvSnafu)?;
-            let kvs = resp.take_kvs();
-            for mut kv in kvs.into_iter() {
-                yield Ok(Kv(kv.take_key(), kv.take_value()))
-            }
-        }))
+    fn name(&self) -> &str {
+        "MetaKvBackend"
    }

-    async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
+    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
+        self.client
+            .range(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
+    }
+
+    async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
        let mut response = self
            .client
            .range(RangeRequest::new().with_key(key))
            .await
+            .map_err(BoxedError::new)
            .context(MetaSrvSnafu)?;
-        Ok(response
-            .take_kvs()
-            .get_mut(0)
-            .map(|kv| Kv(kv.take_key(), kv.take_value())))
+        Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
+            key: kv.take_key(),
+            value: kv.take_value(),
+        }))
    }

-    async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
-        let req = PutRequest::new()
-            .with_key(key.to_vec())
-            .with_value(val.to_vec());
-        let _ = self.client.put(req).await.context(MetaSrvSnafu)?;
-        Ok(())
+    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
+        self.client
+            .batch_put(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
    }

-    async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
-        let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
-        let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
-        info!(
-            "Delete range, key: {}, end: {}, deleted: {}",
-            String::from_utf8_lossy(key),
-            String::from_utf8_lossy(end),
-            resp.deleted()
-        );
-
-        Ok(())
+    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
+        self.client
+            .put(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
    }

-    async fn compare_and_set(
+    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+        self.client
+            .delete_range(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
+    }
+
+    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
+        self.client
+            .batch_delete(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
+    }
+
+    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
+        self.client
+            .batch_get(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
+    }
+
+    async fn compare_and_put(
        &self,
-        key: &[u8],
-        expect: &[u8],
-        val: &[u8],
-    ) -> Result<Result<(), Option<Vec<u8>>>, Error> {
-        let request = CompareAndPutRequest::new()
-            .with_key(key.to_vec())
-            .with_expect(expect.to_vec())
-            .with_value(val.to_vec());
-        let mut response = self
-            .client
+        request: CompareAndPutRequest,
+    ) -> Result<CompareAndPutResponse> {
+        self.client
            .compare_and_put(request)
            .await
-            .context(MetaSrvSnafu)?;
-        if response.is_success() {
-            Ok(Ok(()))
-        } else {
-            Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
-        }
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
+    }
+
+    async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
+        self.client
+            .move_value(req)
+            .await
+            .map_err(BoxedError::new)
+            .context(MetaSrvSnafu)
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
    }
 }
--- a/src/catalog/src/remote/manager.rs
+++ b/src/catalog/src/remote/manager.rs
--- a/src/catalog/src/remote/mock.rs
+++ b/src/catalog/src/remote/mock.rs
@@ -0,0 +1,128 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock as StdRwLock};
+
+use common_recordbatch::RecordBatch;
+use datatypes::data_type::ConcreteDataType;
+use datatypes::schema::{ColumnSchema, Schema};
+use datatypes::vectors::StringVector;
+use table::engine::{CloseTableResult, EngineContext, TableEngine};
+use table::metadata::TableId;
+use table::requests::{
+    AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
+    TruncateTableRequest,
+};
+use table::test_util::MemTable;
+use table::TableRef;
+
+#[derive(Default)]
+pub struct MockTableEngine {
+    tables: StdRwLock<HashMap<TableId, TableRef>>,
+}
+
+#[async_trait::async_trait]
+impl TableEngine for MockTableEngine {
+    fn name(&self) -> &str {
+        "MockTableEngine"
+    }
+
+    /// Create a table with only one column
+    async fn create_table(
+        &self,
+        _ctx: &EngineContext,
+        request: CreateTableRequest,
+    ) -> table::Result<TableRef> {
+        let table_id = request.id;
+
+        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
+            "name",
+            ConcreteDataType::string_datatype(),
+            true,
+        )]));
+
+        let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
+        let record_batch = RecordBatch::new(schema, data).unwrap();
+        let table: TableRef = Arc::new(MemTable::new_with_catalog(
+            &request.table_name,
+            record_batch,
+            table_id,
+            request.catalog_name,
+            request.schema_name,
+            vec![0],
+        )) as Arc<_>;
+
+        let mut tables = self.tables.write().unwrap();
+        let _ = tables.insert(table_id, table.clone() as TableRef);
+        Ok(table)
+    }
+
+    async fn open_table(
+        &self,
+        _ctx: &EngineContext,
+        request: OpenTableRequest,
+    ) -> table::Result<Option<TableRef>> {
+        Ok(self.tables.read().unwrap().get(&request.table_id).cloned())
+    }
+
+    async fn alter_table(
+        &self,
+        _ctx: &EngineContext,
+        _request: AlterTableRequest,
+    ) -> table::Result<TableRef> {
+        unimplemented!()
+    }
+
+    fn get_table(
+        &self,
+        _ctx: &EngineContext,
+        table_id: TableId,
+    ) -> table::Result<Option<TableRef>> {
+        Ok(self.tables.read().unwrap().get(&table_id).cloned())
+    }
+
+    fn table_exists(&self, _ctx: &EngineContext, table_id: TableId) -> bool {
+        self.tables.read().unwrap().contains_key(&table_id)
+    }
+
+    async fn drop_table(
+        &self,
+        _ctx: &EngineContext,
+        _request: DropTableRequest,
+    ) -> table::Result<bool> {
+        unimplemented!()
+    }
+
+    async fn close_table(
+        &self,
+        _ctx: &EngineContext,
+        request: CloseTableRequest,
+    ) -> table::Result<CloseTableResult> {
+        let _ = self.tables.write().unwrap().remove(&request.table_id);
+        Ok(CloseTableResult::Released(vec![]))
+    }
+
+    async fn close(&self) -> table::Result<()> {
+        Ok(())
+    }
+
+    async fn truncate_table(
+        &self,
+        _ctx: &EngineContext,
+        _request: TruncateTableRequest,
+    ) -> table::Result<bool> {
+        Ok(true)
+    }
+}
--- a/src/catalog/src/remote/region_alive_keeper.rs
+++ b/src/catalog/src/remote/region_alive_keeper.rs
@@ -0,0 +1,826 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::future::Future;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use common_meta::error::InvalidProtoMsgSnafu;
+use common_meta::heartbeat::handler::{
+    HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
+};
+use common_meta::ident::TableIdent;
+use common_meta::RegionIdent;
+use common_telemetry::{debug, error, info, warn};
+use snafu::{OptionExt, ResultExt};
+use store_api::storage::RegionNumber;
+use table::engine::manager::TableEngineManagerRef;
+use table::engine::{CloseTableResult, EngineContext, TableEngineRef};
+use table::requests::CloseTableRequest;
+use table::TableRef;
+use tokio::sync::{mpsc, oneshot, Mutex};
+use tokio::task::JoinHandle;
+use tokio::time::{Duration, Instant};
+
+use crate::error::{Result, TableEngineNotFoundSnafu};
+
+/// [RegionAliveKeepers] manages all [RegionAliveKeeper] in a scope of tables.
+pub struct RegionAliveKeepers {
+    table_engine_manager: TableEngineManagerRef,
+    keepers: Arc<Mutex<HashMap<TableIdent, Arc<RegionAliveKeeper>>>>,
+    heartbeat_interval_millis: u64,
+    started: AtomicBool,
+
+    /// The epoch when [RegionAliveKeepers] is created. It's used to get a monotonically non-decreasing
+    /// elapsed time when submitting heartbeats to Metasrv (because [Instant] is monotonically
+    /// non-decreasing). The heartbeat request will carry the duration since this epoch, and the
+    /// duration acts like an "invariant point" for region's keep alive lease.
+    epoch: Instant,
+}
+
+impl RegionAliveKeepers {
+    pub fn new(
+        table_engine_manager: TableEngineManagerRef,
+        heartbeat_interval_millis: u64,
+    ) -> Self {
+        Self {
+            table_engine_manager,
+            keepers: Arc::new(Mutex::new(HashMap::new())),
+            heartbeat_interval_millis,
+            started: AtomicBool::new(false),
+            epoch: Instant::now(),
+        }
+    }
+
+    pub async fn find_keeper(&self, table_ident: &TableIdent) -> Option<Arc<RegionAliveKeeper>> {
+        self.keepers.lock().await.get(table_ident).cloned()
+    }
+
+    pub async fn register_table(&self, table_ident: TableIdent, table: TableRef) -> Result<()> {
+        let keeper = self.find_keeper(&table_ident).await;
+        if keeper.is_some() {
+            return Ok(());
+        }
+
+        let table_engine = self
+            .table_engine_manager
+            .engine(&table_ident.engine)
+            .context(TableEngineNotFoundSnafu {
+                engine_name: &table_ident.engine,
+            })?;
+
+        let keeper = Arc::new(RegionAliveKeeper::new(
+            table_engine,
+            table_ident.clone(),
+            self.heartbeat_interval_millis,
+        ));
+        for r in table.table_info().meta.region_numbers.iter() {
+            keeper.register_region(*r).await;
+        }
+
+        let mut keepers = self.keepers.lock().await;
+        let _ = keepers.insert(table_ident.clone(), keeper.clone());
+
+        if self.started.load(Ordering::Relaxed) {
+            keeper.start().await;
+
+            info!("RegionAliveKeeper for table {table_ident} is started!");
+        } else {
+            info!("RegionAliveKeeper for table {table_ident} is registered but not started yet!");
+        }
+        Ok(())
+    }
+
+    pub async fn deregister_table(
+        &self,
+        table_ident: &TableIdent,
+    ) -> Option<Arc<RegionAliveKeeper>> {
+        self.keepers.lock().await.remove(table_ident).map(|x| {
+            info!("Deregister RegionAliveKeeper for table {table_ident}");
+            x
+        })
+    }
+
+    pub async fn register_region(&self, region_ident: &RegionIdent) {
+        let table_ident = &region_ident.table_ident;
+        let Some(keeper) = self.find_keeper(table_ident).await else {
+            // Alive keeper could be affected by lagging msg, just warn and ignore.
+            warn!("Alive keeper for region {region_ident} is not found!");
+            return;
+        };
+        keeper.register_region(region_ident.region_number).await
+    }
+
+    pub async fn deregister_region(&self, region_ident: &RegionIdent) {
+        let table_ident = &region_ident.table_ident;
+        let Some(keeper) = self.find_keeper(table_ident).await else {
+            // Alive keeper could be affected by lagging msg, just warn and ignore.
+            warn!("Alive keeper for region {region_ident} is not found!");
+            return;
+        };
+        let _ = keeper.deregister_region(region_ident.region_number).await;
+    }
+
+    pub async fn start(&self) {
+        let keepers = self.keepers.lock().await;
+        for keeper in keepers.values() {
+            keeper.start().await;
+        }
+        self.started.store(true, Ordering::Relaxed);
+
+        info!(
+            "RegionAliveKeepers for tables {:?} are started!",
+            keepers.keys().map(|x| x.to_string()).collect::<Vec<_>>(),
+        );
+    }
+
+    pub fn epoch(&self) -> Instant {
+        self.epoch
+    }
+}
+
+#[async_trait]
+impl HeartbeatResponseHandler for RegionAliveKeepers {
+    fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
+        !ctx.response.region_leases.is_empty()
+    }
+
+    async fn handle(
+        &self,
+        ctx: &mut HeartbeatResponseHandlerContext,
+    ) -> common_meta::error::Result<HandleControl> {
+        let leases = ctx.response.region_leases.drain(..).collect::<Vec<_>>();
+        for lease in leases {
+            let table_ident: TableIdent = match lease
+                .table_ident
+                .context(InvalidProtoMsgSnafu {
+                    err_msg: "'table_ident' is missing in RegionLease",
+                })
+                .and_then(|x| x.try_into())
+            {
+                Ok(x) => x,
+                Err(e) => {
+                    error!(e; "");
+                    continue;
+                }
+            };
+
+            let Some(keeper) = self.keepers.lock().await.get(&table_ident).cloned() else {
+                // Alive keeper could be affected by lagging msg, just warn and ignore.
+                warn!("Alive keeper for table {table_ident} is not found!");
+                continue;
+            };
+
+            let start_instant = self.epoch + Duration::from_millis(lease.duration_since_epoch);
+            let deadline = start_instant + Duration::from_secs(lease.lease_seconds);
+            keeper.keep_lived(lease.regions, deadline).await;
+        }
+        Ok(HandleControl::Continue)
+    }
+}
+
+/// [RegionAliveKeeper] starts a countdown for each region in a table. When deadline is reached,
+/// the region will be closed.
+/// The deadline is controlled by Metasrv. It works like "lease" for regions: a Datanode submits its
+/// opened regions to Metasrv, in heartbeats. If Metasrv decides some region could be resided in this
+/// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to
+/// countdown.
+pub struct RegionAliveKeeper {
+    table_engine: TableEngineRef,
+    table_ident: TableIdent,
+    countdown_task_handles: Arc<Mutex<HashMap<RegionNumber, Arc<CountdownTaskHandle>>>>,
+    heartbeat_interval_millis: u64,
+    started: AtomicBool,
+}
+
+impl RegionAliveKeeper {
+    fn new(
+        table_engine: TableEngineRef,
+        table_ident: TableIdent,
+        heartbeat_interval_millis: u64,
+    ) -> Self {
+        Self {
+            table_engine,
+            table_ident,
+            countdown_task_handles: Arc::new(Mutex::new(HashMap::new())),
+            heartbeat_interval_millis,
+            started: AtomicBool::new(false),
+        }
+    }
+
+    async fn find_handle(&self, region: &RegionNumber) -> Option<Arc<CountdownTaskHandle>> {
+        self.countdown_task_handles
+            .lock()
+            .await
+            .get(region)
+            .cloned()
+    }
+
+    async fn register_region(&self, region: RegionNumber) {
+        if self.find_handle(&region).await.is_some() {
+            return;
+        }
+
+        let countdown_task_handles = Arc::downgrade(&self.countdown_task_handles);
+        let on_task_finished = async move {
+            if let Some(x) = countdown_task_handles.upgrade() {
+                let _ = x.lock().await.remove(&region);
+            } // Else the countdown task handles map could be dropped because the keeper is dropped.
+        };
+        let handle = Arc::new(CountdownTaskHandle::new(
+            self.table_engine.clone(),
+            self.table_ident.clone(),
+            region,
+            || on_task_finished,
+        ));
+
+        let mut handles = self.countdown_task_handles.lock().await;
+        let _ = handles.insert(region, handle.clone());
+
+        if self.started.load(Ordering::Relaxed) {
+            handle.start(self.heartbeat_interval_millis).await;
+
+            info!(
+                "Region alive countdown for region {region} in table {} is started!",
+                self.table_ident
+            );
+        } else {
+            info!(
+                "Region alive countdown for region {region} in table {} is registered but not started yet!",
+                self.table_ident
+            );
+        }
+    }
+
+    async fn deregister_region(&self, region: RegionNumber) -> Option<Arc<CountdownTaskHandle>> {
+        self.countdown_task_handles
+            .lock()
+            .await
+            .remove(&region)
+            .map(|x| {
+                info!(
+                    "Deregister alive countdown for region {region} in table {}",
+                    self.table_ident
+                );
+                x
+            })
+    }
+
+    async fn start(&self) {
+        let handles = self.countdown_task_handles.lock().await;
+        for handle in handles.values() {
+            handle.start(self.heartbeat_interval_millis).await;
+        }
+
+        self.started.store(true, Ordering::Relaxed);
+        info!(
+            "Region alive countdowns for regions {:?} in table {} are started!",
+            handles.keys().copied().collect::<Vec<_>>(),
+            self.table_ident
+        );
+    }
+
+    async fn keep_lived(&self, designated_regions: Vec<RegionNumber>, deadline: Instant) {
+        for region in designated_regions {
+            if let Some(handle) = self.find_handle(&region).await {
+                handle.reset_deadline(deadline).await;
+            }
+            // Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
+        }
+    }
+
+    pub async fn deadline(&self, region: RegionNumber) -> Option<Instant> {
+        let mut deadline = None;
+        if let Some(handle) = self.find_handle(&region).await {
+            let (s, r) = oneshot::channel();
+            if handle.tx.send(CountdownCommand::Deadline(s)).await.is_ok() {
+                deadline = r.await.ok()
+            }
+        }
+        deadline
+    }
+
+    pub fn table_ident(&self) -> &TableIdent {
+        &self.table_ident
+    }
+}
+
+#[derive(Debug)]
+enum CountdownCommand {
+    Start(u64),
+    Reset(Instant),
+    Deadline(oneshot::Sender<Instant>),
+}
+
+struct CountdownTaskHandle {
+    tx: mpsc::Sender<CountdownCommand>,
+    handler: JoinHandle<()>,
+    table_ident: TableIdent,
+    region: RegionNumber,
+}
+
+impl CountdownTaskHandle {
+    /// Creates a new [CountdownTaskHandle] and starts the countdown task.
+    /// # Params
+    /// - `on_task_finished`: a callback to be invoked when the task is finished. Note that it will not
+    ///   be invoked if the task is cancelled (by dropping the handle). This is because we want something
+    ///   meaningful to be done when the task is finished, e.g. deregister the handle from the map.
+    ///   While dropping the handle does not necessarily mean the task is finished.
+    fn new<Fut>(
+        table_engine: TableEngineRef,
+        table_ident: TableIdent,
+        region: RegionNumber,
+        on_task_finished: impl FnOnce() -> Fut + Send + 'static,
+    ) -> Self
+    where
+        Fut: Future<Output = ()> + Send,
+    {
+        let (tx, rx) = mpsc::channel(1024);
+
+        let mut countdown_task = CountdownTask {
+            table_engine,
+            table_ident: table_ident.clone(),
+            region,
+            rx,
+        };
+        let handler = common_runtime::spawn_bg(async move {
+            countdown_task.run().await;
+            on_task_finished().await;
+        });
+
+        Self {
+            tx,
+            handler,
+            table_ident,
+            region,
+        }
+    }
+
+    async fn start(&self, heartbeat_interval_millis: u64) {
+        if let Err(e) = self
+            .tx
+            .send(CountdownCommand::Start(heartbeat_interval_millis))
+            .await
+        {
+            warn!(
+                "Failed to start region alive keeper countdown: {e}. \
+                Maybe the task is stopped due to region been closed."
+            );
+        }
+    }
+
+    async fn reset_deadline(&self, deadline: Instant) {
+        if let Err(e) = self.tx.send(CountdownCommand::Reset(deadline)).await {
+            warn!(
+                "Failed to reset region alive keeper deadline: {e}. \
+                Maybe the task is stopped due to region been closed."
+            );
+        }
+    }
+}
+
+impl Drop for CountdownTaskHandle {
+    fn drop(&mut self) {
+        debug!(
+            "Aborting region alive countdown task for region {} in table {}",
+            self.region, self.table_ident,
+        );
+        self.handler.abort();
+    }
+}
+
+struct CountdownTask {
+    table_engine: TableEngineRef,
+    table_ident: TableIdent,
+    region: RegionNumber,
+    rx: mpsc::Receiver<CountdownCommand>,
+}
+
+impl CountdownTask {
+    async fn run(&mut self) {
+        // 30 years. See `Instant::far_future`.
+        let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
+
+        // Make sure the alive countdown is not gonna happen before heartbeat task is started (the
+        // "start countdown" command will be sent from heartbeat task).
+        let countdown = tokio::time::sleep_until(far_future);
+        tokio::pin!(countdown);
+
+        let region = &self.region;
+        let table_ident = &self.table_ident;
+        loop {
+            tokio::select! {
+                command = self.rx.recv() => {
+                    match command {
+                        Some(CountdownCommand::Start(heartbeat_interval_millis)) => {
+                            // Set first deadline in 4 heartbeats (roughly after 20 seconds from now if heartbeat
+                            // interval is set to default 5 seconds), to make Datanode and Metasrv more tolerable to
+                            // network or other jitters during startup.
+                            let first_deadline = Instant::now() + Duration::from_millis(heartbeat_interval_millis) * 4;
+                            countdown.set(tokio::time::sleep_until(first_deadline));
+                        },
+                        Some(CountdownCommand::Reset(deadline)) => {
+                            if countdown.deadline() < deadline {
+                                debug!(
+                                    "Reset deadline of region {region} of table {table_ident} to approximately {} seconds later",
+                                    (deadline - Instant::now()).as_secs_f32(),
+                                );
+                                countdown.set(tokio::time::sleep_until(deadline));
+                            }
+                            // Else the countdown could be either:
+                            // - not started yet;
+                            // - during startup protection;
+                            // - received a lagging heartbeat message.
+                            // All can be safely ignored.
+                        },
+                        None => {
+                            info!(
+                                "The handle of countdown task for region {region} of table {table_ident} \
+                                is dropped, RegionAliveKeeper out."
+                            );
+                            break;
+                        },
+                        Some(CountdownCommand::Deadline(tx)) => {
+                            let _ = tx.send(countdown.deadline());
+                        }
+                    }
+                }
+                () = &mut countdown => {
+                    let result = self.close_region().await;
+                    warn!(
+                        "Region {region} of table {table_ident} is closed, result: {result:?}. \
+                        RegionAliveKeeper out.",
+                    );
+                    break;
+                }
+            }
+        }
+    }
+
+    async fn close_region(&self) -> CloseTableResult {
+        let ctx = EngineContext::default();
+        let region = self.region;
+        let table_ident = &self.table_ident;
+        loop {
+            let request = CloseTableRequest {
+                catalog_name: table_ident.catalog.clone(),
+                schema_name: table_ident.schema.clone(),
+                table_name: table_ident.table.clone(),
+                table_id: table_ident.table_id,
+                region_numbers: vec![region],
+                flush: true,
+            };
+            match self.table_engine.close_table(&ctx, request).await {
+                Ok(result) => return result,
+                // If region is failed to close, immediately retry. Maybe we should panic instead?
+                Err(e) => error!(e;
+                    "Failed to close region {region} of table {table_ident}. \
+                    For the integrity of data, retry closing and retry without wait.",
+                ),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::atomic::{AtomicBool, Ordering};
+    use std::sync::Arc;
+
+    use api::v1::meta::{HeartbeatResponse, RegionLease};
+    use common_meta::heartbeat::mailbox::HeartbeatMailbox;
+    use datatypes::schema::RawSchema;
+    use table::engine::manager::MemoryTableEngineManager;
+    use table::engine::TableEngine;
+    use table::requests::{CreateTableRequest, TableOptions};
+    use table::test_util::EmptyTable;
+
+    use super::*;
+    use crate::remote::mock::MockTableEngine;
+
+    async fn prepare_keepers() -> (TableIdent, RegionAliveKeepers) {
+        let table_engine = Arc::new(MockTableEngine::default());
+        let table_engine_manager = Arc::new(MemoryTableEngineManager::new(table_engine));
+        let keepers = RegionAliveKeepers::new(table_engine_manager, 5000);
+
+        let catalog = "my_catalog";
+        let schema = "my_schema";
+        let table = "my_table";
+        let table_ident = TableIdent {
+            catalog: catalog.to_string(),
+            schema: schema.to_string(),
+            table: table.to_string(),
+            table_id: 1,
+            engine: "MockTableEngine".to_string(),
+        };
+        let table = Arc::new(EmptyTable::new(CreateTableRequest {
+            id: 1,
+            catalog_name: catalog.to_string(),
+            schema_name: schema.to_string(),
+            table_name: table.to_string(),
+            desc: None,
+            schema: RawSchema {
+                column_schemas: vec![],
+                timestamp_index: None,
+                version: 0,
+            },
+            region_numbers: vec![1, 2, 3],
+            primary_key_indices: vec![],
+            create_if_not_exists: false,
+            table_options: TableOptions::default(),
+            engine: "MockTableEngine".to_string(),
+        }));
+        keepers
+            .register_table(table_ident.clone(), table)
+            .await
+            .unwrap();
+        assert!(keepers.keepers.lock().await.contains_key(&table_ident));
+
+        (table_ident, keepers)
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_handle_heartbeat_response() {
+        let (table_ident, keepers) = prepare_keepers().await;
+
+        keepers.start().await;
+        let startup_protection_until = Instant::now() + Duration::from_secs(21);
+
+        let duration_since_epoch = (Instant::now() - keepers.epoch).as_millis() as _;
+        let lease_seconds = 100;
+        let response = HeartbeatResponse {
+            region_leases: vec![RegionLease {
+                table_ident: Some(table_ident.clone().into()),
+                regions: vec![1, 3], // Not extending region 2's lease time.
+                duration_since_epoch,
+                lease_seconds,
+            }],
+            ..Default::default()
+        };
+        let keep_alive_until = keepers.epoch
+            + Duration::from_millis(duration_since_epoch)
+            + Duration::from_secs(lease_seconds);
+
+        let (tx, _) = mpsc::channel(8);
+        let mailbox = Arc::new(HeartbeatMailbox::new(tx));
+        let mut ctx = HeartbeatResponseHandlerContext::new(mailbox, response);
+
+        assert!(keepers.handle(&mut ctx).await.unwrap() == HandleControl::Continue);
+
+        // sleep to wait for background task spawned in `handle`
+        tokio::time::sleep(Duration::from_secs(1)).await;
+
+        async fn test(
+            keeper: &Arc<RegionAliveKeeper>,
+            region_number: RegionNumber,
+            startup_protection_until: Instant,
+            keep_alive_until: Instant,
+            is_kept_live: bool,
+        ) {
+            let deadline = keeper.deadline(region_number).await.unwrap();
+            if is_kept_live {
+                assert!(deadline > startup_protection_until && deadline == keep_alive_until);
+            } else {
+                assert!(deadline <= startup_protection_until);
+            }
+        }
+
+        let keeper = &keepers
+            .keepers
+            .lock()
+            .await
+            .get(&table_ident)
+            .cloned()
+            .unwrap();
+
+        // Test region 1 and 3 is kept lived. Their deadlines are updated to desired instant.
+        test(keeper, 1, startup_protection_until, keep_alive_until, true).await;
+        test(keeper, 3, startup_protection_until, keep_alive_until, true).await;
+
+        // Test region 2 is not kept lived. It's deadline is not updated: still during startup protection period.
+        test(keeper, 2, startup_protection_until, keep_alive_until, false).await;
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_region_alive_keepers() {
+        let (table_ident, keepers) = prepare_keepers().await;
+
+        keepers
+            .register_region(&RegionIdent {
+                cluster_id: 1,
+                datanode_id: 1,
+                table_ident: table_ident.clone(),
+                region_number: 4,
+            })
+            .await;
+
+        keepers.start().await;
+        for keeper in keepers.keepers.lock().await.values() {
+            let regions = {
+                let handles = keeper.countdown_task_handles.lock().await;
+                handles.keys().copied().collect::<Vec<_>>()
+            };
+            for region in regions {
+                // assert countdown tasks are started
+                let deadline = keeper.deadline(region).await.unwrap();
+                assert!(deadline <= Instant::now() + Duration::from_secs(20));
+            }
+        }
+
+        keepers
+            .deregister_region(&RegionIdent {
+                cluster_id: 1,
+                datanode_id: 1,
+                table_ident: table_ident.clone(),
+                region_number: 1,
+            })
+            .await;
+        let mut regions = keepers
+            .find_keeper(&table_ident)
+            .await
+            .unwrap()
+            .countdown_task_handles
+            .lock()
+            .await
+            .keys()
+            .copied()
+            .collect::<Vec<_>>();
+        regions.sort();
+        assert_eq!(regions, vec![2, 3, 4]);
+
+        let keeper = keepers.deregister_table(&table_ident).await.unwrap();
+        assert!(Arc::try_unwrap(keeper).is_ok(), "keeper is not dropped");
+        assert!(keepers.keepers.lock().await.is_empty());
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_region_alive_keeper() {
+        let table_engine = Arc::new(MockTableEngine::default());
+        let table_ident = TableIdent {
+            catalog: "my_catalog".to_string(),
+            schema: "my_schema".to_string(),
+            table: "my_table".to_string(),
+            table_id: 1024,
+            engine: "mito".to_string(),
+        };
+        let keeper = RegionAliveKeeper::new(table_engine, table_ident, 1000);
+
+        let region = 1;
+        assert!(keeper.find_handle(&region).await.is_none());
+        keeper.register_region(region).await;
+        let _ = keeper.find_handle(&region).await.unwrap();
+
+        let ten_seconds_later = || Instant::now() + Duration::from_secs(10);
+
+        keeper.keep_lived(vec![1, 2, 3], ten_seconds_later()).await;
+        assert!(keeper.find_handle(&2).await.is_none());
+        assert!(keeper.find_handle(&3).await.is_none());
+
+        let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 29);
+        // assert if keeper is not started, keep_lived is of no use
+        assert!(keeper.deadline(region).await.unwrap() > far_future);
+
+        keeper.start().await;
+        keeper.keep_lived(vec![1, 2, 3], ten_seconds_later()).await;
+        // assert keep_lived works if keeper is started
+        assert!(keeper.deadline(region).await.unwrap() <= ten_seconds_later());
+
+        let handle = keeper.deregister_region(region).await.unwrap();
+        assert!(Arc::try_unwrap(handle).is_ok(), "handle is not dropped");
+        assert!(keeper.find_handle(&region).await.is_none());
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_countdown_task_handle() {
+        let table_engine = Arc::new(MockTableEngine::default());
+        let table_ident = TableIdent {
+            catalog: "my_catalog".to_string(),
+            schema: "my_schema".to_string(),
+            table: "my_table".to_string(),
+            table_id: 1024,
+            engine: "mito".to_string(),
+        };
+        let finished = Arc::new(AtomicBool::new(false));
+        let finished_clone = finished.clone();
+        let handle = CountdownTaskHandle::new(
+            table_engine.clone(),
+            table_ident.clone(),
+            1,
+            || async move { finished_clone.store(true, Ordering::Relaxed) },
+        );
+        let tx = handle.tx.clone();
+
+        // assert countdown task is running
+        tx.send(CountdownCommand::Start(5000)).await.unwrap();
+        assert!(!finished.load(Ordering::Relaxed));
+
+        drop(handle);
+        tokio::time::sleep(Duration::from_secs(1)).await;
+
+        // assert countdown task is stopped
+        assert!(tx
+            .try_send(CountdownCommand::Reset(
+                Instant::now() + Duration::from_secs(10)
+            ))
+            .is_err());
+        // assert `on_task_finished` is not called (because the task is aborted by the handle's drop)
+        assert!(!finished.load(Ordering::Relaxed));
+
+        let finished = Arc::new(AtomicBool::new(false));
+        let finished_clone = finished.clone();
+        let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, || async move {
+            finished_clone.store(true, Ordering::Relaxed)
+        });
+        handle.tx.send(CountdownCommand::Start(100)).await.unwrap();
+        tokio::time::sleep(Duration::from_secs(1)).await;
+        // assert `on_task_finished` is called when task is finished normally
+        assert!(finished.load(Ordering::Relaxed));
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_countdown_task_run() {
+        let ctx = &EngineContext::default();
+        let catalog = "my_catalog";
+        let schema = "my_schema";
+        let table = "my_table";
+        let table_id = 1;
+        let request = CreateTableRequest {
+            id: table_id,
+            catalog_name: catalog.to_string(),
+            schema_name: schema.to_string(),
+            table_name: table.to_string(),
+            desc: None,
+            schema: RawSchema {
+                column_schemas: vec![],
+                timestamp_index: None,
+                version: 0,
+            },
+            region_numbers: vec![],
+            primary_key_indices: vec![],
+            create_if_not_exists: false,
+            table_options: TableOptions::default(),
+            engine: "mito".to_string(),
+        };
+
+        let table_engine = Arc::new(MockTableEngine::default());
+        let _ = table_engine.create_table(ctx, request).await.unwrap();
+
+        let table_ident = TableIdent {
+            catalog: catalog.to_string(),
+            schema: schema.to_string(),
+            table: table.to_string(),
+            table_id,
+            engine: "mito".to_string(),
+        };
+        let (tx, rx) = mpsc::channel(10);
+        let mut task = CountdownTask {
+            table_engine: table_engine.clone(),
+            table_ident,
+            region: 1,
+            rx,
+        };
+        let _handle = common_runtime::spawn_bg(async move {
+            task.run().await;
+        });
+
+        async fn deadline(tx: &mpsc::Sender<CountdownCommand>) -> Instant {
+            let (s, r) = oneshot::channel();
+            tx.send(CountdownCommand::Deadline(s)).await.unwrap();
+            r.await.unwrap()
+        }
+
+        // if countdown task is not started, its deadline is set to far future
+        assert!(deadline(&tx).await > Instant::now() + Duration::from_secs(86400 * 365 * 29));
+
+        // start countdown in 250ms * 4 = 1s
+        tx.send(CountdownCommand::Start(250)).await.unwrap();
+        // assert deadline is correctly set
+        assert!(deadline(&tx).await <= Instant::now() + Duration::from_secs(1));
+
+        // reset countdown in 1.5s
+        tx.send(CountdownCommand::Reset(
+            Instant::now() + Duration::from_millis(1500),
+        ))
+        .await
+        .unwrap();
+
+        // assert the table is closed after deadline is reached
+        assert!(table_engine.table_exists(ctx, table_id));
+        // spare 500ms for the task to close the table
+        tokio::time::sleep(Duration::from_millis(2000)).await;
+        assert!(!table_engine.table_exists(ctx, table_id));
+    }
+}
--- a/src/catalog/src/schema.rs
+++ b/src/catalog/src/schema.rs
@@ -1,54 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::any::Any;
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use table::TableRef;
-
-use crate::error::Result;
-
-/// Represents a schema, comprising a number of named tables.
-#[async_trait]
-pub trait SchemaProvider: Sync + Send {
-    /// Returns the schema provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available table names in this schema.
-    fn table_names(&self) -> Result<Vec<String>>;
-
-    /// Retrieves a specific table from the schema by name, provided it exists.
-    async fn table(&self, name: &str) -> Result<Option<TableRef>>;
-
-    /// If supported by the implementation, adds a new table to this schema.
-    /// If a table of the same name existed before, it returns "Table already exists" error.
-    fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>>;
-
-    /// If supported by the implementation, renames an existing table from this schema and returns it.
-    /// If no table of that name exists, returns "Table not found" error.
-    fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef>;
-
-    /// If supported by the implementation, removes an existing table from this schema and returns it.
-    /// If no table of that name exists, returns Ok(None).
-    fn deregister_table(&self, name: &str) -> Result<Option<TableRef>>;
-
-    /// If supported by the implementation, checks the table exist in the schema provider or not.
-    /// If no matched table in the schema provider, return false.
-    /// Otherwise, return true.
-    fn table_exist(&self, name: &str) -> Result<bool>;
-}
-
-pub type SchemaProviderRef = Arc<dyn SchemaProvider>;
--- a/src/catalog/src/system.rs
+++ b/src/catalog/src/system.rs
@@ -17,11 +17,9 @@ use std::collections::HashMap;
 use std::sync::Arc;

 use common_catalog::consts::{
-    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME,
-    SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
+    DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE,
+    SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
 };
-use common_query::logical_plan::Expr;
-use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
 use common_recordbatch::SendableRecordBatchStream;
 use common_telemetry::debug;
 use common_time::util;
@@ -30,12 +28,13 @@ use datatypes::schema::{ColumnSchema, RawSchema, SchemaRef};
 use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, OptionExt, ResultExt};
+use store_api::storage::ScanRequest;
 use table::engine::{EngineContext, TableEngineRef};
 use table::metadata::{TableId, TableInfoRef};
 use table::requests::{
    CreateTableRequest, DeleteRequest, InsertRequest, OpenTableRequest, TableOptions,
 };
-use table::{Table, TableRef};
+use table::{Result as TableResult, Table, TableRef};

 use crate::error::{
    self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
@@ -59,17 +58,12 @@ impl Table for SystemCatalogTable {
        self.0.schema()
    }

-    async fn scan(
-        &self,
-        projection: Option<&Vec<usize>>,
-        filters: &[Expr],
-        limit: Option<usize>,
-    ) -> table::Result<PhysicalPlanRef> {
-        self.0.scan(projection, filters, limit).await
+    async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
+        self.0.scan_to_stream(request).await
    }

    /// Insert values into table.
-    async fn insert(&self, request: InsertRequest) -> table::error::Result<usize> {
+    async fn insert(&self, request: InsertRequest) -> TableResult<usize> {
        self.0.insert(request).await
    }

@@ -77,9 +71,13 @@ impl Table for SystemCatalogTable {
        self.0.table_info()
    }

-    async fn delete(&self, request: DeleteRequest) -> table::Result<usize> {
+    async fn delete(&self, request: DeleteRequest) -> TableResult<usize> {
        self.0.delete(request).await
    }
+
+    fn statistics(&self) -> Option<table::stats::TableStatistics> {
+        self.0.statistics()
+    }
 }

 impl SystemCatalogTable {
@@ -89,6 +87,7 @@ impl SystemCatalogTable {
            schema_name: INFORMATION_SCHEMA_NAME.to_string(),
            table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
            table_id: SYSTEM_CATALOG_TABLE_ID,
+            region_numbers: vec![0],
        };
        let schema = build_system_catalog_schema();
        let ctx = EngineContext::default();
@@ -112,6 +111,7 @@ impl SystemCatalogTable {
                primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
                create_if_not_exists: true,
                table_options: TableOptions::default(),
+                engine: engine.name().to_string(),
            };

            let table = engine
@@ -125,14 +125,17 @@ impl SystemCatalogTable {
    /// Create a stream of all entries inside system catalog table
    pub async fn records(&self) -> Result<SendableRecordBatchStream> {
        let full_projection = None;
-        let ctx = SessionContext::new();
-        let scan = self
-            .scan(full_projection, &[], None)
+        let scan_req = ScanRequest {
+            sequence: None,
+            projection: full_projection,
+            filters: vec![],
+            output_ordering: None,
+            limit: None,
+        };
+        let stream = self
+            .scan_to_stream(scan_req)
            .await
            .context(error::SystemCatalogTableScanSnafu)?;
-        let stream = scan
-            .execute(0, ctx.task_ctx())
-            .context(error::SystemCatalogTableScanExecSnafu)?;
        Ok(stream)
    }
 }
@@ -194,43 +197,56 @@ pub fn build_table_insert_request(
    schema: String,
    table_name: String,
    table_id: TableId,
+    engine: String,
 ) -> InsertRequest {
    let entry_key = format_table_entry_key(&catalog, &schema, table_id);
    build_insert_request(
        EntryType::Table,
        entry_key.as_bytes(),
-        serde_json::to_string(&TableEntryValue { table_name })
-            .unwrap()
-            .as_bytes(),
+        serde_json::to_string(&TableEntryValue {
+            table_name,
+            engine,
+            is_deleted: false,
+        })
+        .unwrap()
+        .as_bytes(),
    )
 }

 pub(crate) fn build_table_deletion_request(
    request: &DeregisterTableRequest,
    table_id: TableId,
-) -> DeleteRequest {
-    let table_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
-    DeleteRequest {
-        key_column_values: build_primary_key_columns(EntryType::Table, table_key.as_bytes()),
-    }
+) -> InsertRequest {
+    let entry_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
+    build_insert_request(
+        EntryType::Table,
+        entry_key.as_bytes(),
+        serde_json::to_string(&TableEntryValue {
+            table_name: "".to_string(),
+            engine: "".to_string(),
+            is_deleted: true,
+        })
+        .unwrap()
+        .as_bytes(),
+    )
 }

 fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<String, VectorRef> {
-    let mut m = HashMap::with_capacity(3);
-    m.insert(
-        "entry_type".to_string(),
-        Arc::new(UInt8Vector::from_slice([entry_type as u8])) as _,
-    );
-    m.insert(
-        "key".to_string(),
-        Arc::new(BinaryVector::from_slice(&[key])) as _,
-    );
-    // Timestamp in key part is intentionally left to 0
-    m.insert(
-        "timestamp".to_string(),
-        Arc::new(TimestampMillisecondVector::from_slice([0])) as _,
-    );
-    m
+    HashMap::from([
+        (
+            "entry_type".to_string(),
+            Arc::new(UInt8Vector::from_slice([entry_type as u8])) as VectorRef,
+        ),
+        (
+            "key".to_string(),
+            Arc::new(BinaryVector::from_slice(&[key])) as VectorRef,
+        ),
+        (
+            "timestamp".to_string(),
+            // Timestamp in key part is intentionally left to 0
+            Arc::new(TimestampMillisecondVector::from_slice([0])) as VectorRef,
+        ),
+    ])
 }

 pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
@@ -250,18 +266,18 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
    let mut columns_values = HashMap::with_capacity(6);
    columns_values.extend(primary_key_columns.into_iter());

-    columns_values.insert(
+    let _ = columns_values.insert(
        "value".to_string(),
        Arc::new(BinaryVector::from_slice(&[value])) as _,
    );

    let now = util::current_time_millis();
-    columns_values.insert(
+    let _ = columns_values.insert(
        "gmt_created".to_string(),
        Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
    );

-    columns_values.insert(
+    let _ = columns_values.insert(
        "gmt_modified".to_string(),
        Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
    );
@@ -330,6 +346,8 @@ pub fn decode_system_catalog(
                schema_name: table_parts[1].to_string(),
                table_name: table_meta.table_name,
                table_id,
+                engine: table_meta.engine,
+                is_deleted: table_meta.is_deleted,
            }))
        }
    }
@@ -385,11 +403,27 @@ pub struct TableEntry {
    pub schema_name: String,
    pub table_name: String,
    pub table_id: TableId,
+    pub engine: String,
+    pub is_deleted: bool,
 }

 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct TableEntryValue {
    pub table_name: String,
+
+    #[serde(default = "mito_engine")]
+    pub engine: String,
+
+    #[serde(default = "not_deleted")]
+    pub is_deleted: bool,
+}
+
+fn mito_engine() -> String {
+    MITO_ENGINE.to_string()
+}
+
+fn not_deleted() -> bool {
+    false
 }

 #[cfg(test)]
@@ -399,8 +433,8 @@ mod tests {
    use datatypes::value::Value;
    use log_store::NoopLogStore;
    use mito::config::EngineConfig;
-    use mito::engine::MitoEngine;
-    use object_store::{ObjectStore, ObjectStoreBuilder};
+    use mito::engine::{MitoEngine, MITO_ENGINE};
+    use object_store::ObjectStore;
    use storage::compaction::noop::NoopCompactionScheduler;
    use storage::config::EngineConfig as StorageEngineConfig;
    use storage::EngineImpl;
@@ -461,14 +495,13 @@ mod tests {
    }

    #[test]
-    #[should_panic]
    pub fn test_decode_mismatch() {
-        decode_system_catalog(
+        assert!(decode_system_catalog(
            Some(EntryType::Table as u8),
            Some("some_catalog.some_schema.42".as_bytes()),
            None,
        )
-        .unwrap();
+        .is_err());
    }

    #[test]
@@ -482,11 +515,9 @@ mod tests {
    pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
        let dir = create_temp_dir("system-table-test");
        let store_dir = dir.path().to_string_lossy();
-        let accessor = object_store::services::Fs::default()
-            .root(&store_dir)
-            .build()
-            .unwrap();
-        let object_store = ObjectStore::new(accessor).finish();
+        let mut builder = object_store::services::Fs::default();
+        let _ = builder.root(&store_dir);
+        let object_store = ObjectStore::new(builder).unwrap().finish();
        let noop_compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
        let table_engine = Arc::new(MitoEngine::new(
            EngineConfig::default(),
@@ -495,7 +526,8 @@ mod tests {
                Arc::new(NoopLogStore::default()),
                object_store.clone(),
                noop_compaction_scheduler,
-            ),
+            )
+            .unwrap(),
            object_store,
        ));
        (dir, table_engine)
@@ -530,6 +562,7 @@ mod tests {
            DEFAULT_SCHEMA_NAME.to_string(),
            "my_table".to_string(),
            1,
+            MITO_ENGINE.to_string(),
        );
        let result = catalog_table.insert(table_insertion).await.unwrap();
        assert_eq!(result, 1);
@@ -550,6 +583,8 @@ mod tests {
            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
            table_name: "my_table".to_string(),
            table_id: 1,
+            engine: MITO_ENGINE.to_string(),
+            is_deleted: false,
        });
        assert_eq!(entry, expected);

@@ -561,11 +596,11 @@ mod tests {
            },
            1,
        );
-        let result = catalog_table.delete(table_deletion).await.unwrap();
+        let result = catalog_table.insert(table_deletion).await.unwrap();
        assert_eq!(result, 1);

        let records = catalog_table.records().await.unwrap();
        let batches = RecordBatches::try_collect(records).await.unwrap().take();
-        assert_eq!(batches.len(), 0);
+        assert_eq!(batches.len(), 1);
    }
 }
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -15,21 +15,20 @@
 use std::collections::HashMap;
 use std::sync::Arc;

+use common_catalog::consts::INFORMATION_SCHEMA_NAME;
 use common_catalog::format_full_table_name;
-use datafusion::common::{OwnedTableReference, ResolvedTableReference, TableReference};
+use datafusion::common::{ResolvedTableReference, TableReference};
 use datafusion::datasource::provider_as_source;
 use datafusion::logical_expr::TableSource;
 use session::context::QueryContext;
 use snafu::{ensure, OptionExt};
 use table::table::adapter::DfTableProviderAdapter;

-use crate::error::{
-    CatalogNotFoundSnafu, QueryAccessDeniedSnafu, Result, SchemaNotFoundSnafu, TableNotExistSnafu,
-};
-use crate::CatalogListRef;
+use crate::error::{QueryAccessDeniedSnafu, Result, TableNotExistSnafu};
+use crate::CatalogManagerRef;

 pub struct DfTableSourceProvider {
-    catalog_list: CatalogListRef,
+    catalog_manager: CatalogManagerRef,
    resolved_tables: HashMap<String, Arc<dyn TableSource>>,
    disallow_cross_schema_query: bool,
    default_catalog: String,
@@ -38,12 +37,12 @@ pub struct DfTableSourceProvider {

 impl DfTableSourceProvider {
    pub fn new(
-        catalog_list: CatalogListRef,
+        catalog_manager: CatalogManagerRef,
        disallow_cross_schema_query: bool,
        query_ctx: &QueryContext,
    ) -> Self {
        Self {
-            catalog_list,
+            catalog_manager,
            disallow_cross_schema_query,
            resolved_tables: HashMap::new(),
            default_catalog: query_ctx.current_catalog(),
@@ -60,7 +59,8 @@ impl DfTableSourceProvider {
                TableReference::Bare { .. } => (),
                TableReference::Partial { schema, .. } => {
                    ensure!(
-                        schema.as_ref() == self.default_schema,
+                        schema.as_ref() == self.default_schema
+                            || schema.as_ref() == INFORMATION_SCHEMA_NAME,
                        QueryAccessDeniedSnafu {
                            catalog: &self.default_catalog,
                            schema: schema.as_ref(),
@@ -72,7 +72,8 @@ impl DfTableSourceProvider {
                } => {
                    ensure!(
                        catalog.as_ref() == self.default_catalog
-                            && schema.as_ref() == self.default_schema,
+                            && (schema.as_ref() == self.default_schema
+                                || schema.as_ref() == INFORMATION_SCHEMA_NAME),
                        QueryAccessDeniedSnafu {
                            catalog: catalog.as_ref(),
                            schema: schema.as_ref()
@@ -87,9 +88,8 @@ impl DfTableSourceProvider {

    pub async fn resolve_table(
        &mut self,
-        table_ref: OwnedTableReference,
+        table_ref: TableReference<'_>,
    ) -> Result<Arc<dyn TableSource>> {
-        let table_ref = table_ref.as_table_reference();
        let table_ref = self.resolve_table_ref(table_ref)?;

        let resolved_name = table_ref.to_string();
@@ -101,25 +101,18 @@ impl DfTableSourceProvider {
        let schema_name = table_ref.schema.as_ref();
        let table_name = table_ref.table.as_ref();

-        let catalog = self
-            .catalog_list
-            .catalog(catalog_name)?
-            .context(CatalogNotFoundSnafu { catalog_name })?;
-        let schema = catalog.schema(schema_name)?.context(SchemaNotFoundSnafu {
-            catalog: catalog_name,
-            schema: schema_name,
-        })?;
-        let table = schema
-            .table(table_name)
+        let table = self
+            .catalog_manager
+            .table(catalog_name, schema_name, table_name)
            .await?
            .with_context(|| TableNotExistSnafu {
                table: format_full_table_name(catalog_name, schema_name, table_name),
            })?;

-        let table = DfTableProviderAdapter::new(table);
-        let table = provider_as_source(Arc::new(table));
-        self.resolved_tables.insert(resolved_name, table.clone());
-        Ok(table)
+        let provider = DfTableProviderAdapter::new(table);
+        let source = provider_as_source(Arc::new(provider));
+        let _ = self.resolved_tables.insert(resolved_name, source.clone());
+        Ok(source)
    }
 }

@@ -143,14 +136,14 @@ mod tests {
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        assert!(result.is_ok());
+        let _ = result.unwrap();

        let table_ref = TableReference::Partial {
            schema: Cow::Borrowed("public"),
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        assert!(result.is_ok());
+        let _ = result.unwrap();

        let table_ref = TableReference::Partial {
            schema: Cow::Borrowed("wrong_schema"),
@@ -165,7 +158,7 @@ mod tests {
            table: Cow::Borrowed("table_name"),
        };
        let result = table_provider.resolve_table_ref(table_ref);
-        assert!(result.is_ok());
+        let _ = result.unwrap();

        let table_ref = TableReference::Full {
            catalog: Cow::Borrowed("wrong_catalog"),
@@ -174,5 +167,25 @@ mod tests {
        };
        let result = table_provider.resolve_table_ref(table_ref);
        assert!(result.is_err());
+
+        let table_ref = TableReference::Partial {
+            schema: Cow::Borrowed("information_schema"),
+            table: Cow::Borrowed("columns"),
+        };
+        let _ = table_provider.resolve_table_ref(table_ref).unwrap();
+
+        let table_ref = TableReference::Full {
+            catalog: Cow::Borrowed("greptime"),
+            schema: Cow::Borrowed("information_schema"),
+            table: Cow::Borrowed("columns"),
+        };
+        let _ = table_provider.resolve_table_ref(table_ref).unwrap();
+
+        let table_ref = TableReference::Full {
+            catalog: Cow::Borrowed("dummy"),
+            schema: Cow::Borrowed("information_schema"),
+            table: Cow::Borrowed("columns"),
+        };
+        assert!(table_provider.resolve_table_ref(table_ref).is_err());
    }
 }
--- a/src/catalog/src/tables.rs
+++ b/src/catalog/src/tables.rs
@@ -14,250 +14,31 @@

 // The `tables` table in system catalog keeps a record of all tables created by user.

-use std::any::Any;
-use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{Context, Poll};

-use async_stream::stream;
-use async_trait::async_trait;
-use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
-use common_error::ext::BoxedError;
-use common_query::logical_plan::Expr;
-use common_query::physical_plan::PhysicalPlanRef;
-use common_recordbatch::error::Result as RecordBatchResult;
-use common_recordbatch::{RecordBatch, RecordBatchStream};
-use datatypes::prelude::{ConcreteDataType, DataType};
-use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
-use datatypes::value::ValueRef;
-use datatypes::vectors::VectorRef;
-use futures::Stream;
+use common_telemetry::logging;
 use snafu::ResultExt;
-use table::engine::TableEngineRef;
-use table::error::TablesRecordBatchSnafu;
-use table::metadata::{TableId, TableInfoRef};
-use table::table::scan::SimpleTableScan;
-use table::{Table, TableRef};
+use table::metadata::TableId;
+use table::Table;

-use crate::error::{self, Error, InsertCatalogRecordSnafu, Result as CatalogResult};
+use crate::error::{self, InsertCatalogRecordSnafu, Result as CatalogResult};
 use crate::system::{
    build_schema_insert_request, build_table_deletion_request, build_table_insert_request,
    SystemCatalogTable,
 };
-use crate::{
-    CatalogListRef, CatalogProvider, DeregisterTableRequest, SchemaProvider, SchemaProviderRef,
-};
-
-/// Tables holds all tables created by user.
-pub struct Tables {
-    schema: SchemaRef,
-    catalogs: CatalogListRef,
-    engine_name: String,
-}
-
-impl Tables {
-    pub fn new(catalogs: CatalogListRef, engine_name: String) -> Self {
-        Self {
-            schema: Arc::new(build_schema_for_tables()),
-            catalogs,
-            engine_name,
-        }
-    }
-}
-
-#[async_trait::async_trait]
-impl Table for Tables {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn table_info(&self) -> TableInfoRef {
-        unreachable!("Tables does not support table_info method")
-    }
-
-    async fn scan(
-        &self,
-        _projection: Option<&Vec<usize>>,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> table::error::Result<PhysicalPlanRef> {
-        let catalogs = self.catalogs.clone();
-        let schema_ref = self.schema.clone();
-        let engine_name = self.engine_name.clone();
-
-        let stream = stream!({
-            for catalog_name in catalogs
-                .catalog_names()
-                .map_err(BoxedError::new)
-                .context(TablesRecordBatchSnafu)?
-            {
-                let catalog = catalogs
-                    .catalog(&catalog_name)
-                    .map_err(BoxedError::new)
-                    .context(TablesRecordBatchSnafu)?
-                    .unwrap();
-                for schema_name in catalog
-                    .schema_names()
-                    .map_err(BoxedError::new)
-                    .context(TablesRecordBatchSnafu)?
-                {
-                    let mut tables_in_schema = Vec::with_capacity(
-                        catalog
-                            .schema_names()
-                            .map_err(BoxedError::new)
-                            .context(TablesRecordBatchSnafu)?
-                            .len(),
-                    );
-                    let schema = catalog
-                        .schema(&schema_name)
-                        .map_err(BoxedError::new)
-                        .context(TablesRecordBatchSnafu)?
-                        .unwrap();
-                    for table_name in schema
-                        .table_names()
-                        .map_err(BoxedError::new)
-                        .context(TablesRecordBatchSnafu)?
-                    {
-                        tables_in_schema.push(table_name);
-                    }
-
-                    let vec = tables_to_record_batch(
-                        &catalog_name,
-                        &schema_name,
-                        tables_in_schema,
-                        &engine_name,
-                    );
-                    let record_batch_res = RecordBatch::new(schema_ref.clone(), vec);
-                    yield record_batch_res;
-                }
-            }
-        });
-
-        let stream = Box::pin(TablesRecordBatchStream {
-            schema: self.schema.clone(),
-            stream: Box::pin(stream),
-        });
-        Ok(Arc::new(SimpleTableScan::new(stream)))
-    }
-}
-
-/// Convert tables info to `RecordBatch`.
-fn tables_to_record_batch(
-    catalog_name: &str,
-    schema_name: &str,
-    table_names: Vec<String>,
-    engine: &str,
-) -> Vec<VectorRef> {
-    let mut catalog_vec =
-        ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
-    let mut schema_vec =
-        ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
-    let mut table_name_vec =
-        ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
-    let mut engine_vec =
-        ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
-
-    for table_name in table_names {
-        // Safety: All these vectors are string type.
-        catalog_vec.push_value_ref(ValueRef::String(catalog_name));
-        schema_vec.push_value_ref(ValueRef::String(schema_name));
-        table_name_vec.push_value_ref(ValueRef::String(&table_name));
-        engine_vec.push_value_ref(ValueRef::String(engine));
-    }
-
-    vec![
-        catalog_vec.to_vector(),
-        schema_vec.to_vector(),
-        table_name_vec.to_vector(),
-        engine_vec.to_vector(),
-    ]
-}
-
-pub struct TablesRecordBatchStream {
-    schema: SchemaRef,
-    stream: Pin<Box<dyn Stream<Item = RecordBatchResult<RecordBatch>> + Send>>,
-}
-
-impl Stream for TablesRecordBatchStream {
-    type Item = RecordBatchResult<RecordBatch>;
-
-    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        Pin::new(&mut self.stream).poll_next(cx)
-    }
-}
-
-impl RecordBatchStream for TablesRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
+use crate::DeregisterTableRequest;

 pub struct InformationSchema {
-    pub tables: Arc<Tables>,
    pub system: Arc<SystemCatalogTable>,
 }

-#[async_trait]
-impl SchemaProvider for InformationSchema {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn table_names(&self) -> Result<Vec<String>, Error> {
-        Ok(vec![
-            "tables".to_string(),
-            SYSTEM_CATALOG_TABLE_NAME.to_string(),
-        ])
-    }
-
-    async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
-        if name.eq_ignore_ascii_case("tables") {
-            Ok(Some(self.tables.clone()))
-        } else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
-            Ok(Some(self.system.clone()))
-        } else {
-            Ok(None)
-        }
-    }
-
-    fn register_table(
-        &self,
-        _name: String,
-        _table: TableRef,
-    ) -> crate::error::Result<Option<TableRef>> {
-        panic!("System catalog & schema does not support register table")
-    }
-
-    fn rename_table(&self, _name: &str, _new_name: String) -> crate::error::Result<TableRef> {
-        unimplemented!("System catalog & schema does not support rename table")
-    }
-
-    fn deregister_table(&self, _name: &str) -> crate::error::Result<Option<TableRef>> {
-        panic!("System catalog & schema does not support deregister table")
-    }
-
-    fn table_exist(&self, name: &str) -> Result<bool, Error> {
-        Ok(name.eq_ignore_ascii_case("tables")
-            || name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
-    }
-}
-
 pub struct SystemCatalog {
    pub information_schema: Arc<InformationSchema>,
 }

 impl SystemCatalog {
-    pub fn new(
-        system: SystemCatalogTable,
-        catalogs: CatalogListRef,
-        engine: TableEngineRef,
-    ) -> Self {
+    pub(crate) fn new(system: SystemCatalogTable) -> Self {
        let schema = InformationSchema {
-            tables: Arc::new(Tables::new(catalogs, engine.name().to_string())),
            system: Arc::new(system),
        };
        Self {
@@ -271,8 +52,9 @@ impl SystemCatalog {
        schema: String,
        table_name: String,
        table_id: TableId,
+        engine: String,
    ) -> crate::error::Result<usize> {
-        let request = build_table_insert_request(catalog, schema, table_name, table_id);
+        let request = build_table_insert_request(catalog, schema, table_name, table_id, engine);
        self.information_schema
            .system
            .insert(request)
@@ -284,12 +66,21 @@ impl SystemCatalog {
        &self,
        request: &DeregisterTableRequest,
        table_id: TableId,
-    ) -> CatalogResult<bool> {
+    ) -> CatalogResult<()> {
        self.information_schema
            .system
-            .delete(build_table_deletion_request(request, table_id))
+            .insert(build_table_deletion_request(request, table_id))
            .await
-            .map(|x| x == 1)
+            .map(|x| {
+                if x != 1 {
+                    let table = common_catalog::format_full_table_name(
+                        &request.catalog,
+                        &request.schema,
+                        &request.table_name
+                    );
+                    logging::warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
+                }
+            })
            .with_context(|_| error::DeregisterTableSnafu {
                request: request.clone(),
            })
@@ -308,130 +99,3 @@ impl SystemCatalog {
            .context(InsertCatalogRecordSnafu)
    }
 }
-
-impl CatalogProvider for SystemCatalog {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema_names(&self) -> Result<Vec<String>, Error> {
-        Ok(vec![INFORMATION_SCHEMA_NAME.to_string()])
-    }
-
-    fn register_schema(
-        &self,
-        _name: String,
-        _schema: SchemaProviderRef,
-    ) -> Result<Option<SchemaProviderRef>, Error> {
-        panic!("System catalog does not support registering schema!")
-    }
-
-    fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>, Error> {
-        if name.eq_ignore_ascii_case(INFORMATION_SCHEMA_NAME) {
-            Ok(Some(self.information_schema.clone()))
-        } else {
-            Ok(None)
-        }
-    }
-}
-
-fn build_schema_for_tables() -> Schema {
-    let cols = vec![
-        ColumnSchema::new(
-            "catalog".to_string(),
-            ConcreteDataType::string_datatype(),
-            false,
-        ),
-        ColumnSchema::new(
-            "schema".to_string(),
-            ConcreteDataType::string_datatype(),
-            false,
-        ),
-        ColumnSchema::new(
-            "table_name".to_string(),
-            ConcreteDataType::string_datatype(),
-            false,
-        ),
-        ColumnSchema::new(
-            "engine".to_string(),
-            ConcreteDataType::string_datatype(),
-            false,
-        ),
-    ];
-    Schema::new(cols)
-}
-
-#[cfg(test)]
-mod tests {
-    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
-    use common_query::physical_plan::SessionContext;
-    use futures_util::StreamExt;
-    use table::table::numbers::NumbersTable;
-
-    use super::*;
-    use crate::local::memory::new_memory_catalog_list;
-    use crate::CatalogList;
-
-    #[tokio::test]
-    async fn test_tables() {
-        let catalog_list = new_memory_catalog_list().unwrap();
-        let schema = catalog_list
-            .catalog(DEFAULT_CATALOG_NAME)
-            .unwrap()
-            .unwrap()
-            .schema(DEFAULT_SCHEMA_NAME)
-            .unwrap()
-            .unwrap();
-        schema
-            .register_table("test_table".to_string(), Arc::new(NumbersTable::default()))
-            .unwrap();
-
-        let tables = Tables::new(catalog_list, "test_engine".to_string());
-        let tables_stream = tables.scan(None, &[], None).await.unwrap();
-        let session_ctx = SessionContext::new();
-        let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
-
-        if let Some(t) = tables_stream.next().await {
-            let batch = t.unwrap();
-            assert_eq!(1, batch.num_rows());
-            assert_eq!(4, batch.num_columns());
-            assert_eq!(
-                ConcreteDataType::string_datatype(),
-                batch.column(0).data_type()
-            );
-            assert_eq!(
-                ConcreteDataType::string_datatype(),
-                batch.column(1).data_type()
-            );
-            assert_eq!(
-                ConcreteDataType::string_datatype(),
-                batch.column(2).data_type()
-            );
-            assert_eq!(
-                ConcreteDataType::string_datatype(),
-                batch.column(3).data_type()
-            );
-            assert_eq!(
-                "greptime",
-                batch.column(0).get_ref(0).as_string().unwrap().unwrap()
-            );
-
-            assert_eq!(
-                "public",
-                batch.column(1).get_ref(0).as_string().unwrap().unwrap()
-            );
-
-            assert_eq!(
-                "test_table",
-                batch.column(2).get_ref(0).as_string().unwrap().unwrap()
-            );
-
-            assert_eq!(
-                "test_engine",
-                batch.column(3).get_ref(0).as_string().unwrap().unwrap()
-            );
-        } else {
-            panic!("Record batch should not be empty!")
-        }
-    }
-}
--- a/src/catalog/tests/local_catalog_tests.rs
+++ b/src/catalog/tests/local_catalog_tests.rs
@@ -20,28 +20,32 @@ mod tests {
    use catalog::{CatalogManager, RegisterTableRequest, RenameTableRequest};
    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
    use common_telemetry::{error, info};
+    use common_test_util::temp_dir::TempDir;
    use mito::config::EngineConfig;
+    use table::engine::manager::MemoryTableEngineManager;
    use table::table::numbers::NumbersTable;
    use table::TableRef;
    use tokio::sync::Mutex;

-    async fn create_local_catalog_manager() -> Result<LocalCatalogManager, catalog::error::Error> {
-        let (_dir, object_store) =
+    async fn create_local_catalog_manager(
+    ) -> Result<(TempDir, LocalCatalogManager), catalog::error::Error> {
+        let (dir, object_store) =
            mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
        let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
            EngineConfig::default(),
            mito::table::test_util::MockEngine::default(),
            object_store,
        ));
-        let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
+        let engine_manager = Arc::new(MemoryTableEngineManager::new(mock_engine.clone()));
+        let catalog_manager = LocalCatalogManager::try_new(engine_manager).await.unwrap();
        catalog_manager.start().await?;
-        Ok(catalog_manager)
+        Ok((dir, catalog_manager))
    }

    #[tokio::test]
    async fn test_rename_table() {
        common_telemetry::init_default_ut_logging();
-        let catalog_manager = create_local_catalog_manager().await.unwrap();
+        let (_dir, catalog_manager) = create_local_catalog_manager().await.unwrap();
        // register table
        let table_name = "test_table";
        let table_id = 42;
@@ -79,7 +83,7 @@ mod tests {

    #[tokio::test]
    async fn test_duplicate_register() {
-        let catalog_manager = create_local_catalog_manager().await.unwrap();
+        let (_dir, catalog_manager) = create_local_catalog_manager().await.unwrap();
        let request = RegisterTableRequest {
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
@@ -116,8 +120,9 @@ mod tests {
    fn test_concurrent_register() {
        common_telemetry::init_default_ut_logging();
        let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
-        let catalog_manager =
-            Arc::new(rt.block_on(async { create_local_catalog_manager().await.unwrap() }));
+        let (_dir, catalog_manager) =
+            rt.block_on(async { create_local_catalog_manager().await.unwrap() });
+        let catalog_manager = Arc::new(catalog_manager);

        let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));

--- a/src/catalog/tests/mock.rs
+++ b/src/catalog/tests/mock.rs
@@ -1,228 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::btree_map::Entry;
-use std::collections::{BTreeMap, HashMap};
-use std::fmt::{Display, Formatter};
-use std::str::FromStr;
-use std::sync::Arc;
-
-use async_stream::stream;
-use catalog::error::Error;
-use catalog::remote::{Kv, KvBackend, ValueIter};
-use common_recordbatch::RecordBatch;
-use common_telemetry::logging::info;
-use datatypes::data_type::ConcreteDataType;
-use datatypes::schema::{ColumnSchema, Schema};
-use datatypes::vectors::StringVector;
-use serde::Serializer;
-use table::engine::{EngineContext, TableEngine, TableReference};
-use table::metadata::TableId;
-use table::requests::{AlterTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest};
-use table::test_util::MemTable;
-use table::TableRef;
-use tokio::sync::RwLock;
-
-#[derive(Default)]
-pub struct MockKvBackend {
-    map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
-}
-
-impl Display for MockKvBackend {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        futures::executor::block_on(async {
-            let map = self.map.read().await;
-            for (k, v) in map.iter() {
-                f.serialize_str(&String::from_utf8_lossy(k))?;
-                f.serialize_str(" -> ")?;
-                f.serialize_str(&String::from_utf8_lossy(v))?;
-                f.serialize_str("\n")?;
-            }
-            Ok(())
-        })
-    }
-}
-
-#[async_trait::async_trait]
-impl KvBackend for MockKvBackend {
-    fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
-    where
-        'a: 'b,
-    {
-        let prefix = key.to_vec();
-        let prefix_string = String::from_utf8_lossy(&prefix).to_string();
-        Box::pin(stream!({
-            let maps = self.map.read().await.clone();
-            for (k, v) in maps.range(prefix.clone()..) {
-                let key_string = String::from_utf8_lossy(k).to_string();
-                let matches = key_string.starts_with(&prefix_string);
-                if matches {
-                    yield Ok(Kv(k.clone(), v.clone()))
-                } else {
-                    info!("Stream finished");
-                    return;
-                }
-            }
-        }))
-    }
-
-    async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
-        let mut map = self.map.write().await;
-        map.insert(key.to_vec(), val.to_vec());
-        Ok(())
-    }
-
-    async fn compare_and_set(
-        &self,
-        key: &[u8],
-        expect: &[u8],
-        val: &[u8],
-    ) -> Result<Result<(), Option<Vec<u8>>>, Error> {
-        let mut map = self.map.write().await;
-        let existing = map.entry(key.to_vec());
-        match existing {
-            Entry::Vacant(e) => {
-                if expect.is_empty() {
-                    e.insert(val.to_vec());
-                    Ok(Ok(()))
-                } else {
-                    Ok(Err(None))
-                }
-            }
-            Entry::Occupied(mut existing) => {
-                if existing.get() == expect {
-                    existing.insert(val.to_vec());
-                    Ok(Ok(()))
-                } else {
-                    Ok(Err(Some(existing.get().clone())))
-                }
-            }
-        }
-    }
-
-    async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
-        let start = key.to_vec();
-        let end = end.to_vec();
-        let range = start..end;
-
-        let mut map = self.map.write().await;
-        map.retain(|k, _| !range.contains(k));
-        Ok(())
-    }
-}
-
-#[derive(Default)]
-pub struct MockTableEngine {
-    tables: RwLock<HashMap<String, TableRef>>,
-}
-
-#[async_trait::async_trait]
-impl TableEngine for MockTableEngine {
-    fn name(&self) -> &str {
-        "MockTableEngine"
-    }
-
-    /// Create a table with only one column
-    async fn create_table(
-        &self,
-        _ctx: &EngineContext,
-        request: CreateTableRequest,
-    ) -> table::Result<TableRef> {
-        let table_name = request.table_name.clone();
-        let catalog_name = request.catalog_name.clone();
-        let schema_name = request.schema_name.clone();
-
-        let default_table_id = "0".to_owned();
-        let table_id = TableId::from_str(
-            request
-                .table_options
-                .extra_options
-                .get("table_id")
-                .unwrap_or(&default_table_id),
-        )
-        .unwrap();
-        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
-            "name",
-            ConcreteDataType::string_datatype(),
-            true,
-        )]));
-
-        let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
-        let record_batch = RecordBatch::new(schema, data).unwrap();
-        let table: TableRef = Arc::new(MemTable::new_with_catalog(
-            &table_name,
-            record_batch,
-            table_id,
-            catalog_name,
-            schema_name,
-            vec![0],
-        )) as Arc<_>;
-
-        let mut tables = self.tables.write().await;
-        tables.insert(table_name, table.clone() as TableRef);
-        Ok(table)
-    }
-
-    async fn open_table(
-        &self,
-        _ctx: &EngineContext,
-        request: OpenTableRequest,
-    ) -> table::Result<Option<TableRef>> {
-        Ok(self.tables.read().await.get(&request.table_name).cloned())
-    }
-
-    async fn alter_table(
-        &self,
-        _ctx: &EngineContext,
-        _request: AlterTableRequest,
-    ) -> table::Result<TableRef> {
-        unimplemented!()
-    }
-
-    fn get_table(
-        &self,
-        _ctx: &EngineContext,
-        table_ref: &TableReference,
-    ) -> table::Result<Option<TableRef>> {
-        futures::executor::block_on(async {
-            Ok(self
-                .tables
-                .read()
-                .await
-                .get(&table_ref.to_string())
-                .cloned())
-        })
-    }
-
-    fn table_exists(&self, _ctx: &EngineContext, table_ref: &TableReference) -> bool {
-        futures::executor::block_on(async {
-            self.tables
-                .read()
-                .await
-                .contains_key(&table_ref.to_string())
-        })
-    }
-
-    async fn drop_table(
-        &self,
-        _ctx: &EngineContext,
-        _request: DropTableRequest,
-    ) -> table::Result<bool> {
-        unimplemented!()
-    }
-
-    async fn close(&self) -> table::Result<()> {
-        Ok(())
-    }
-}
--- a/src/catalog/tests/remote_catalog_tests.rs
+++ b/src/catalog/tests/remote_catalog_tests.rs
@@ -14,95 +14,177 @@

 #![feature(assert_matches)]

-mod mock;
-
 #[cfg(test)]
 mod tests {
    use std::assert_matches::assert_matches;
    use std::collections::HashSet;
    use std::sync::Arc;
+    use std::time::Duration;

-    use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
-    use catalog::remote::{
-        KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
-    };
-    use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
-    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+    use catalog::error::Error;
+    use catalog::remote::mock::MockTableEngine;
+    use catalog::remote::region_alive_keeper::RegionAliveKeepers;
+    use catalog::remote::{CachedMetaKvBackend, RemoteCatalogManager};
+    use catalog::{CatalogManager, RegisterSchemaRequest, RegisterTableRequest};
+    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
+    use common_meta::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
+    use common_meta::ident::TableIdent;
+    use common_meta::key::TableMetadataManager;
+    use common_meta::kv_backend::memory::MemoryKvBackend;
+    use common_meta::kv_backend::KvBackend;
+    use common_meta::rpc::store::{CompareAndPutRequest, PutRequest, RangeRequest};
    use datatypes::schema::RawSchema;
-    use futures_util::StreamExt;
+    use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
    use table::engine::{EngineContext, TableEngineRef};
    use table::requests::CreateTableRequest;
+    use table::test_util::EmptyTable;
+    use tokio::time::Instant;

-    use crate::mock::{MockKvBackend, MockTableEngine};
+    struct TestingComponents {
+        catalog_manager: Arc<RemoteCatalogManager>,
+        table_engine_manager: TableEngineManagerRef,
+        region_alive_keepers: Arc<RegionAliveKeepers>,
+    }
+
+    impl TestingComponents {
+        fn table_engine(&self) -> TableEngineRef {
+            self.table_engine_manager.engine(MITO_ENGINE).unwrap()
+        }
+    }

    #[tokio::test]
    async fn test_backend() {
-        common_telemetry::init_default_ut_logging();
-        let backend = MockKvBackend::default();
+        let backend = MemoryKvBackend::<Error>::default();

        let default_catalog_key = CatalogKey {
            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
        }
        .to_string();
-
-        backend
-            .set(
-                default_catalog_key.as_bytes(),
-                &CatalogValue {}.as_bytes().unwrap(),
-            )
-            .await
-            .unwrap();
+        let req = PutRequest::new()
+            .with_key(default_catalog_key.as_bytes())
+            .with_value(CatalogValue.as_bytes().unwrap());
+        backend.put(req).await.unwrap();

        let schema_key = SchemaKey {
            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
            schema_name: DEFAULT_SCHEMA_NAME.to_string(),
        }
        .to_string();
-        backend
-            .set(schema_key.as_bytes(), &SchemaValue {}.as_bytes().unwrap())
-            .await
-            .unwrap();
+        let req = PutRequest::new()
+            .with_key(schema_key.as_bytes())
+            .with_value(SchemaValue.as_bytes().unwrap());
+        backend.put(req).await.unwrap();

-        let mut iter = backend.range("__c-".as_bytes());
-        let mut res = HashSet::new();
-        while let Some(r) = iter.next().await {
-            let kv = r.unwrap();
-            res.insert(String::from_utf8_lossy(&kv.0).to_string());
-        }
+        let req = RangeRequest::new().with_prefix(b"__c-".to_vec());
+        let res = backend
+            .range(req)
+            .await
+            .unwrap()
+            .kvs
+            .into_iter()
+            .map(|kv| String::from_utf8_lossy(kv.key()).to_string());
        assert_eq!(
            vec!["__c-greptime".to_string()],
            res.into_iter().collect::<Vec<_>>()
        );
    }

-    async fn prepare_components(
-        node_id: u64,
-    ) -> (KvBackendRef, TableEngineRef, Arc<RemoteCatalogManager>) {
-        let backend = Arc::new(MockKvBackend::default()) as KvBackendRef;
+    #[tokio::test]
+    async fn test_cached_backend() {
+        let backend = CachedMetaKvBackend::wrap(Arc::new(MemoryKvBackend::default()));
+
+        let default_catalog_key = CatalogKey {
+            catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+        }
+        .to_string();
+        let req = PutRequest::new()
+            .with_key(default_catalog_key.as_bytes())
+            .with_value(CatalogValue.as_bytes().unwrap());
+        backend.put(req).await.unwrap();
+
+        let ret = backend.get(b"__c-greptime").await.unwrap();
+        let _ = ret.unwrap();
+
+        let req = CompareAndPutRequest::new()
+            .with_key(b"__c-greptime".to_vec())
+            .with_expect(CatalogValue.as_bytes().unwrap())
+            .with_value(b"123".to_vec());
+        let _ = backend.compare_and_put(req).await.unwrap();
+
+        let ret = backend.get(b"__c-greptime").await.unwrap();
+        assert_eq!(b"123", ret.as_ref().unwrap().value.as_slice());
+
+        let req = PutRequest::new()
+            .with_key(b"__c-greptime".to_vec())
+            .with_value(b"1234".to_vec());
+        let _ = backend.put(req).await;
+
+        let ret = backend.get(b"__c-greptime").await.unwrap();
+        assert_eq!(b"1234", ret.unwrap().value.as_slice());
+
+        backend.delete(b"__c-greptime", false).await.unwrap();
+
+        let ret = backend.get(b"__c-greptime").await.unwrap();
+        assert!(ret.is_none());
+    }
+
+    async fn prepare_components(node_id: u64) -> TestingComponents {
+        let backend = Arc::new(MemoryKvBackend::default());
+
+        let req = PutRequest::new()
+            .with_key(b"__c-greptime".to_vec())
+            .with_value(b"".to_vec());
+        backend.put(req).await.unwrap();
+
+        let req = PutRequest::new()
+            .with_key(b"__s-greptime-public".to_vec())
+            .with_value(b"".to_vec());
+        backend.put(req).await.unwrap();
+
+        let cached_backend = Arc::new(CachedMetaKvBackend::wrap(backend));
+
        let table_engine = Arc::new(MockTableEngine::default());
-        let catalog_manager =
-            RemoteCatalogManager::new(table_engine.clone(), node_id, backend.clone());
+        let engine_manager = Arc::new(MemoryTableEngineManager::alias(
+            MITO_ENGINE.to_string(),
+            table_engine,
+        ));
+
+        let region_alive_keepers = Arc::new(RegionAliveKeepers::new(engine_manager.clone(), 5000));
+
+        let catalog_manager = RemoteCatalogManager::new(
+            engine_manager.clone(),
+            node_id,
+            cached_backend.clone(),
+            region_alive_keepers.clone(),
+            Arc::new(TableMetadataManager::new(cached_backend)),
+        );
        catalog_manager.start().await.unwrap();
-        (backend, table_engine, Arc::new(catalog_manager))
+
+        TestingComponents {
+            catalog_manager: Arc::new(catalog_manager),
+            table_engine_manager: engine_manager,
+            region_alive_keepers,
+        }
    }

    #[tokio::test]
    async fn test_remote_catalog_default() {
        common_telemetry::init_default_ut_logging();
        let node_id = 42;
-        let (_, _, catalog_manager) = prepare_components(node_id).await;
+        let TestingComponents {
+            catalog_manager, ..
+        } = prepare_components(node_id).await;
        assert_eq!(
            vec![DEFAULT_CATALOG_NAME.to_string()],
-            catalog_manager.catalog_names().unwrap()
+            catalog_manager.catalog_names().await.unwrap()
        );

-        let default_catalog = catalog_manager
-            .catalog(DEFAULT_CATALOG_NAME)
-            .unwrap()
-            .unwrap();
        assert_eq!(
            vec![DEFAULT_SCHEMA_NAME.to_string()],
-            default_catalog.schema_names().unwrap()
+            catalog_manager
+                .schema_names(DEFAULT_CATALOG_NAME)
+                .await
+                .unwrap()
        );
    }

@@ -110,14 +192,16 @@ mod tests {
    async fn test_remote_catalog_register_nonexistent() {
        common_telemetry::init_default_ut_logging();
        let node_id = 42;
-        let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
+        let components = prepare_components(node_id).await;
+
        // register a new table with an nonexistent catalog
        let catalog_name = "nonexistent_catalog".to_string();
        let schema_name = "nonexistent_schema".to_string();
        let table_name = "fail_table".to_string();
        // this schema has no effect
        let table_schema = RawSchema::new(vec![]);
-        let table = table_engine
+        let table = components
+            .table_engine()
            .create_table(
                &EngineContext {},
                CreateTableRequest {
@@ -131,6 +215,7 @@ mod tests {
                    primary_key_indices: vec![],
                    create_if_not_exists: false,
                    table_options: Default::default(),
+                    engine: MITO_ENGINE.to_string(),
                },
            )
            .await
@@ -142,7 +227,7 @@ mod tests {
            table_id: 1,
            table,
        };
-        let res = catalog_manager.register_table(reg_req).await;
+        let res = components.catalog_manager.register_table(reg_req).await;

        // because nonexistent_catalog does not exist yet.
        assert_matches!(
@@ -154,22 +239,16 @@ mod tests {
    #[tokio::test]
    async fn test_register_table() {
        let node_id = 42;
-        let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
-        let default_catalog = catalog_manager
-            .catalog(DEFAULT_CATALOG_NAME)
-            .unwrap()
-            .unwrap();
+        let components = prepare_components(node_id).await;
        assert_eq!(
            vec![DEFAULT_SCHEMA_NAME.to_string()],
-            default_catalog.schema_names().unwrap()
+            components
+                .catalog_manager
+                .schema_names(DEFAULT_CATALOG_NAME)
+                .await
+                .unwrap()
        );

-        let default_schema = default_catalog
-            .schema(DEFAULT_SCHEMA_NAME)
-            .unwrap()
-            .unwrap();
-        assert_eq!(vec!["numbers"], default_schema.table_names().unwrap());
-
        // register a new table with an nonexistent catalog
        let catalog_name = DEFAULT_CATALOG_NAME.to_string();
        let schema_name = DEFAULT_SCHEMA_NAME.to_string();
@@ -177,7 +256,8 @@ mod tests {
        let table_id = 1;
        // this schema has no effect
        let table_schema = RawSchema::new(vec![]);
-        let table = table_engine
+        let table = components
+            .table_engine()
            .create_table(
                &EngineContext {},
                CreateTableRequest {
@@ -191,6 +271,7 @@ mod tests {
                    primary_key_indices: vec![],
                    create_if_not_exists: false,
                    table_options: Default::default(),
+                    engine: MITO_ENGINE.to_string(),
                },
            )
            .await
@@ -202,42 +283,51 @@ mod tests {
            table_id,
            table,
        };
-        assert!(catalog_manager.register_table(reg_req).await.unwrap());
+        assert!(components
+            .catalog_manager
+            .register_table(reg_req)
+            .await
+            .unwrap());
        assert_eq!(
-            HashSet::from([table_name, "numbers".to_string()]),
-            default_schema
-                .table_names()
+            vec![table_name],
+            components
+                .catalog_manager
+                .table_names(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
+                .await
                .unwrap()
-                .into_iter()
-                .collect::<HashSet<_>>()
        );
    }

    #[tokio::test]
    async fn test_register_catalog_schema_table() {
        let node_id = 42;
-        let (backend, table_engine, catalog_manager) = prepare_components(node_id).await;
+        let components = prepare_components(node_id).await;

        let catalog_name = "test_catalog".to_string();
        let schema_name = "nonexistent_schema".to_string();
-        let catalog = Arc::new(RemoteCatalogProvider::new(
-            catalog_name.clone(),
-            backend.clone(),
-            node_id,
-        ));

        // register catalog to catalog manager
-        catalog_manager
-            .register_catalog(catalog_name.clone(), catalog)
-            .unwrap();
+        assert!(components
+            .catalog_manager
+            .register_catalog(catalog_name.clone())
+            .await
+            .is_ok());
        assert_eq!(
            HashSet::<String>::from_iter(
                vec![DEFAULT_CATALOG_NAME.to_string(), catalog_name.clone()].into_iter()
            ),
-            HashSet::from_iter(catalog_manager.catalog_names().unwrap().into_iter())
+            HashSet::from_iter(
+                components
+                    .catalog_manager
+                    .catalog_names()
+                    .await
+                    .unwrap()
+                    .into_iter()
+            )
        );

-        let table_to_register = table_engine
+        let table_to_register = components
+            .table_engine()
            .create_table(
                &EngineContext {},
                CreateTableRequest {
@@ -251,6 +341,7 @@ mod tests {
                    primary_key_indices: vec![],
                    create_if_not_exists: false,
                    table_options: Default::default(),
+                    engine: MITO_ENGINE.to_string(),
                },
            )
            .await
@@ -265,33 +356,128 @@ mod tests {
        };
        // this register will fail since schema does not exist yet
        assert_matches!(
-            catalog_manager
+            components
+                .catalog_manager
                .register_table(reg_req.clone())
                .await
                .unwrap_err(),
            catalog::error::Error::SchemaNotFound { .. }
        );

-        let new_catalog = catalog_manager
-            .catalog(&catalog_name)
-            .unwrap()
-            .expect("catalog should exist since it's already registered");
-        let schema = Arc::new(RemoteSchemaProvider::new(
-            catalog_name.clone(),
-            schema_name.clone(),
-            node_id,
-            backend.clone(),
-        ));
-
-        let prev = new_catalog
-            .register_schema(schema_name.clone(), schema.clone())
-            .expect("Register schema should not fail");
-        assert!(prev.is_none());
-        assert!(catalog_manager.register_table(reg_req).await.unwrap());
+        let register_schema_request = RegisterSchemaRequest {
+            catalog: catalog_name.to_string(),
+            schema: schema_name.to_string(),
+        };
+        assert!(components
+            .catalog_manager
+            .register_schema(register_schema_request)
+            .await
+            .expect("Register schema should not fail"));
+        assert!(components
+            .catalog_manager
+            .register_table(reg_req)
+            .await
+            .unwrap());

        assert_eq!(
            HashSet::from([schema_name.clone()]),
-            new_catalog.schema_names().unwrap().into_iter().collect()
+            components
+                .catalog_manager
+                .schema_names(&catalog_name)
+                .await
+                .unwrap()
+                .into_iter()
+                .collect()
        )
    }
+
+    #[tokio::test]
+    async fn test_register_table_before_and_after_region_alive_keeper_started() {
+        let components = prepare_components(42).await;
+        let catalog_manager = &components.catalog_manager;
+        let region_alive_keepers = &components.region_alive_keepers;
+
+        let table_before = TableIdent {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table: "table_before".to_string(),
+            table_id: 1,
+            engine: MITO_ENGINE.to_string(),
+        };
+        let request = RegisterTableRequest {
+            catalog: table_before.catalog.clone(),
+            schema: table_before.schema.clone(),
+            table_name: table_before.table.clone(),
+            table_id: table_before.table_id,
+            table: Arc::new(EmptyTable::new(CreateTableRequest {
+                id: table_before.table_id,
+                catalog_name: table_before.catalog.clone(),
+                schema_name: table_before.schema.clone(),
+                table_name: table_before.table.clone(),
+                desc: None,
+                schema: RawSchema::new(vec![]),
+                region_numbers: vec![0],
+                primary_key_indices: vec![],
+                create_if_not_exists: false,
+                table_options: Default::default(),
+                engine: MITO_ENGINE.to_string(),
+            })),
+        };
+        assert!(catalog_manager.register_table(request).await.unwrap());
+
+        let keeper = region_alive_keepers
+            .find_keeper(&table_before)
+            .await
+            .unwrap();
+        let deadline = keeper.deadline(0).await.unwrap();
+        let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 29);
+        // assert region alive countdown is not started
+        assert!(deadline > far_future);
+
+        region_alive_keepers.start().await;
+
+        let table_after = TableIdent {
+            catalog: DEFAULT_CATALOG_NAME.to_string(),
+            schema: DEFAULT_SCHEMA_NAME.to_string(),
+            table: "table_after".to_string(),
+            table_id: 2,
+            engine: MITO_ENGINE.to_string(),
+        };
+        let request = RegisterTableRequest {
+            catalog: table_after.catalog.clone(),
+            schema: table_after.schema.clone(),
+            table_name: table_after.table.clone(),
+            table_id: table_after.table_id,
+            table: Arc::new(EmptyTable::new(CreateTableRequest {
+                id: table_after.table_id,
+                catalog_name: table_after.catalog.clone(),
+                schema_name: table_after.schema.clone(),
+                table_name: table_after.table.clone(),
+                desc: None,
+                schema: RawSchema::new(vec![]),
+                region_numbers: vec![0],
+                primary_key_indices: vec![],
+                create_if_not_exists: false,
+                table_options: Default::default(),
+                engine: MITO_ENGINE.to_string(),
+            })),
+        };
+        assert!(catalog_manager.register_table(request).await.unwrap());
+
+        let keeper = region_alive_keepers
+            .find_keeper(&table_after)
+            .await
+            .unwrap();
+        let deadline = keeper.deadline(0).await.unwrap();
+        // assert countdown is started for the table registered after [RegionAliveKeepers] started
+        assert!(deadline <= Instant::now() + Duration::from_secs(20));
+
+        let keeper = region_alive_keepers
+            .find_keeper(&table_before)
+            .await
+            .unwrap();
+        let deadline = keeper.deadline(0).await.unwrap();
+        // assert countdown is started for the table registered before [RegionAliveKeepers] started, too
+        assert!(deadline <= Instant::now() + Duration::from_secs(20));
+    }
 }
--- a/src/client/Cargo.toml
+++ b/src/client/Cargo.toml
@@ -4,6 +4,9 @@ version.workspace = true
 edition.workspace = true
 license.workspace = true

+[features]
+testing = []
+
 [dependencies]
 api = { path = "../api" }
 arrow-flight.workspace = true
@@ -12,29 +15,33 @@ common-base = { path = "../common/base" }
 common-catalog = { path = "../common/catalog" }
 common-error = { path = "../common/error" }
 common-grpc = { path = "../common/grpc" }
-common-grpc-expr = { path = "../common/grpc-expr" }
+common-meta = { path = "../common/meta" }
 common-query = { path = "../common/query" }
 common-recordbatch = { path = "../common/recordbatch" }
-common-time = { path = "../common/time" }
 common-telemetry = { path = "../common/telemetry" }
+common-time = { path = "../common/time" }
 datafusion.workspace = true
 datatypes = { path = "../datatypes" }
 enum_dispatch = "0.3"
 futures-util.workspace = true
+moka = { version = "0.9", features = ["future"] }
 parking_lot = "0.12"
 prost.workspace = true
-rand = "0.8"
+rand.workspace = true
 snafu.workspace = true
+tokio-stream = { version = "0.1", features = ["net"] }
+tokio.workspace = true
 tonic.workspace = true

 [dev-dependencies]
+common-grpc-expr = { path = "../common/grpc-expr" }
 datanode = { path = "../datanode" }
+derive-new = "0.5"
+prost.workspace = true
 substrait = { path = "../common/substrait" }
-tokio.workspace = true
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
-prost.workspace = true

 [dev-dependencies.substrait_proto]
 package = "substrait"
-version = "0.4"
+version = "0.7"
--- a/src/client/examples/logical.rs
+++ b/src/client/examples/logical.rs
@@ -14,7 +14,7 @@

 use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, TableId};
 use client::{Client, Database};
-use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
 use prost::Message;
 use substrait_proto::proto::plan_rel::RelType as PlanRelType;
 use substrait_proto::proto::read_rel::{NamedTable, ReadType};
@@ -63,7 +63,8 @@ async fn run() {
        create_if_not_exists: false,
        table_options: Default::default(),
        table_id: Some(TableId { id: 1024 }),
-        region_ids: vec![0],
+        region_numbers: vec![0],
+        engine: MITO_ENGINE.to_string(),
    };

    let db = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
@@ -72,7 +73,7 @@ async fn run() {

    let logical = mock_logical_plan();
    event!(Level::INFO, "plan size: {:#?}", logical.len());
-    let result = db.logical_plan(logical).await.unwrap();
+    let result = db.logical_plan(logical, None).await.unwrap();

    event!(Level::INFO, "result: {:#?}", result);
 }
--- a/src/client/examples/stream_ingest.rs
+++ b/src/client/examples/stream_ingest.rs
@@ -0,0 +1,182 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::*;
+use client::{Client, Database, DEFAULT_SCHEMA_NAME};
+use derive_new::new;
+use tracing::{error, info};
+
+fn main() {
+    tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
+        .unwrap();
+
+    run();
+}
+
+#[tokio::main]
+async fn run() {
+    let greptimedb_endpoint =
+        std::env::var("GREPTIMEDB_ENDPOINT").unwrap_or_else(|_| "localhost:4001".to_owned());
+
+    let greptimedb_dbname =
+        std::env::var("GREPTIMEDB_DBNAME").unwrap_or_else(|_| DEFAULT_SCHEMA_NAME.to_owned());
+
+    let grpc_client = Client::with_urls(vec![&greptimedb_endpoint]);
+
+    let client = Database::new_with_dbname(greptimedb_dbname, grpc_client);
+
+    let stream_inserter = client.streaming_inserter().unwrap();
+
+    if let Err(e) = stream_inserter
+        .insert(vec![to_insert_request(weather_records_1())])
+        .await
+    {
+        error!("Error: {e}");
+    }
+
+    if let Err(e) = stream_inserter
+        .insert(vec![to_insert_request(weather_records_2())])
+        .await
+    {
+        error!("Error: {e}");
+    }
+
+    let result = stream_inserter.finish().await;
+
+    match result {
+        Ok(rows) => {
+            info!("Rows written: {rows}");
+        }
+        Err(e) => {
+            error!("Error: {e}");
+        }
+    };
+}
+
+#[derive(new)]
+struct WeatherRecord {
+    timestamp_millis: i64,
+    collector: String,
+    temperature: f32,
+    humidity: i32,
+}
+
+fn weather_records_1() -> Vec<WeatherRecord> {
+    vec![
+        WeatherRecord::new(1686109527000, "c1".to_owned(), 26.4, 15),
+        WeatherRecord::new(1686023127000, "c1".to_owned(), 29.3, 20),
+        WeatherRecord::new(1685936727000, "c1".to_owned(), 31.8, 13),
+        WeatherRecord::new(1686109527000, "c2".to_owned(), 20.4, 67),
+        WeatherRecord::new(1686023127000, "c2".to_owned(), 18.0, 74),
+        WeatherRecord::new(1685936727000, "c2".to_owned(), 19.2, 81),
+    ]
+}
+
+fn weather_records_2() -> Vec<WeatherRecord> {
+    vec![
+        WeatherRecord::new(1686109527001, "c3".to_owned(), 26.4, 15),
+        WeatherRecord::new(1686023127002, "c3".to_owned(), 29.3, 20),
+        WeatherRecord::new(1685936727003, "c3".to_owned(), 31.8, 13),
+        WeatherRecord::new(1686109527004, "c4".to_owned(), 20.4, 67),
+        WeatherRecord::new(1686023127005, "c4".to_owned(), 18.0, 74),
+        WeatherRecord::new(1685936727006, "c4".to_owned(), 19.2, 81),
+    ]
+}
+
+/// This function generates some random data and bundle them into a
+/// `InsertRequest`.
+///
+/// Data structure:
+///
+/// - `ts`: a timestamp column
+/// - `collector`: a tag column
+/// - `temperature`: a value field of f32
+/// - `humidity`: a value field of i32
+///
+fn to_insert_request(records: Vec<WeatherRecord>) -> InsertRequest {
+    // convert records into columns
+    let rows = records.len();
+
+    // transpose records into columns
+    let (timestamp_millis, collectors, temp, humidity) = records.into_iter().fold(
+        (
+            Vec::with_capacity(rows),
+            Vec::with_capacity(rows),
+            Vec::with_capacity(rows),
+            Vec::with_capacity(rows),
+        ),
+        |mut acc, rec| {
+            acc.0.push(rec.timestamp_millis);
+            acc.1.push(rec.collector);
+            acc.2.push(rec.temperature);
+            acc.3.push(rec.humidity);
+
+            acc
+        },
+    );
+
+    let columns = vec![
+        // timestamp column: `ts`
+        Column {
+            column_name: "ts".to_owned(),
+            values: Some(column::Values {
+                ts_millisecond_values: timestamp_millis,
+                ..Default::default()
+            }),
+            semantic_type: SemanticType::Timestamp as i32,
+            datatype: ColumnDataType::TimestampMillisecond as i32,
+            ..Default::default()
+        },
+        // tag column: collectors
+        Column {
+            column_name: "collector".to_owned(),
+            values: Some(column::Values {
+                string_values: collectors.into_iter().collect(),
+                ..Default::default()
+            }),
+            semantic_type: SemanticType::Tag as i32,
+            datatype: ColumnDataType::String as i32,
+            ..Default::default()
+        },
+        // field column: temperature
+        Column {
+            column_name: "temperature".to_owned(),
+            values: Some(column::Values {
+                f32_values: temp,
+                ..Default::default()
+            }),
+            semantic_type: SemanticType::Field as i32,
+            datatype: ColumnDataType::Float32 as i32,
+            ..Default::default()
+        },
+        // field column: humidity
+        Column {
+            column_name: "humidity".to_owned(),
+            values: Some(column::Values {
+                i32_values: humidity,
+                ..Default::default()
+            }),
+            semantic_type: SemanticType::Field as i32,
+            datatype: ColumnDataType::Int32 as i32,
+            ..Default::default()
+        },
+    ];
+
+    InsertRequest {
+        table_name: "weather_demo".to_owned(),
+        columns,
+        row_count: rows as u32,
+        ..Default::default()
+    }
+}
--- a/src/client/src/client.rs
+++ b/src/client/src/client.rs
@@ -14,6 +14,10 @@

 use std::sync::Arc;

+use api::v1::greptime_database_client::GreptimeDatabaseClient;
+use api::v1::health_check_client::HealthCheckClient;
+use api::v1::prometheus_gateway_client::PrometheusGatewayClient;
+use api::v1::HealthCheckRequest;
 use arrow_flight::flight_service_client::FlightServiceClient;
 use common_grpc::channel_manager::ChannelManager;
 use parking_lot::RwLock;
@@ -23,6 +27,10 @@ use tonic::transport::Channel;
 use crate::load_balance::{LoadBalance, Loadbalancer};
 use crate::{error, Result};

+pub(crate) struct DatabaseClient {
+    pub(crate) inner: GreptimeDatabaseClient<Channel>,
+}
+
 pub(crate) struct FlightClient {
    addr: String,
    client: FlightServiceClient<Channel>,
@@ -118,7 +126,7 @@ impl Client {
        self.inner.set_peers(urls);
    }

-    pub(crate) fn make_client(&self) -> Result<FlightClient> {
+    fn find_channel(&self) -> Result<(String, Channel)> {
        let addr = self
            .inner
            .get_peer()
@@ -131,11 +139,35 @@ impl Client {
            .channel_manager
            .get(&addr)
            .context(error::CreateChannelSnafu { addr: &addr })?;
+        Ok((addr, channel))
+    }
+
+    pub(crate) fn make_flight_client(&self) -> Result<FlightClient> {
+        let (addr, channel) = self.find_channel()?;
        Ok(FlightClient {
            addr,
            client: FlightServiceClient::new(channel),
        })
    }
+
+    pub(crate) fn make_database_client(&self) -> Result<DatabaseClient> {
+        let (_, channel) = self.find_channel()?;
+        Ok(DatabaseClient {
+            inner: GreptimeDatabaseClient::new(channel),
+        })
+    }
+
+    pub fn make_prometheus_gateway_client(&self) -> Result<PrometheusGatewayClient<Channel>> {
+        let (_, channel) = self.find_channel()?;
+        Ok(PrometheusGatewayClient::new(channel))
+    }
+
+    pub async fn health_check(&self) -> Result<()> {
+        let (_, channel) = self.find_channel()?;
+        let mut client = HealthCheckClient::new(channel);
+        let _ = client.health_check(HealthCheckRequest {}).await?;
+        Ok(())
+    }
 }

 #[cfg(test)]
--- a/src/client/src/client_manager.rs
+++ b/src/client/src/client_manager.rs
@@ -12,13 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::fmt::{Debug, Formatter};
 use std::time::Duration;

-use client::Client;
-use common_grpc::channel_manager::ChannelManager;
-use meta_client::rpc::Peer;
+use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
+use common_meta::peer::Peer;
 use moka::future::{Cache, CacheBuilder};

+use crate::Client;
+
 pub struct DatanodeClients {
    channel_manager: ChannelManager,
    clients: Cache<Peer, Client>,
@@ -26,18 +28,30 @@ pub struct DatanodeClients {

 impl Default for DatanodeClients {
    fn default() -> Self {
+        Self::new(ChannelConfig::new())
+    }
+}
+
+impl Debug for DatanodeClients {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("DatanodeClients")
+            .field("channel_manager", &self.channel_manager)
+            .finish()
+    }
+}
+
+impl DatanodeClients {
+    pub fn new(config: ChannelConfig) -> Self {
        Self {
-            channel_manager: ChannelManager::new(),
+            channel_manager: ChannelManager::with_config(config),
            clients: CacheBuilder::new(1024)
                .time_to_live(Duration::from_secs(30 * 60))
                .time_to_idle(Duration::from_secs(5 * 60))
                .build(),
        }
    }
-}

-impl DatanodeClients {
-    pub(crate) async fn get_client(&self, datanode: &Peer) -> Client {
+    pub async fn get_client(&self, datanode: &Peer) -> Client {
        self.clients
            .get_with_by_ref(datanode, async move {
                Client::with_manager_and_urls(
@@ -48,8 +62,8 @@ impl DatanodeClients {
            .await
    }

-    #[cfg(test)]
-    pub(crate) async fn insert_client(&self, datanode: Peer, client: Client) {
+    #[cfg(feature = "testing")]
+    pub async fn insert_client(&self, datanode: Peer, client: Client) {
        self.clients.insert(datanode, client).await
    }
 }
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -12,45 +12,66 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::str::FromStr;
-
 use api::v1::auth_header::AuthScheme;
 use api::v1::ddl_request::Expr as DdlExpr;
 use api::v1::greptime_request::Request;
 use api::v1::query_request::Query;
 use api::v1::{
-    AlterExpr, AuthHeader, CreateTableExpr, DdlRequest, DropTableExpr, GreptimeRequest,
-    InsertRequest, PromRangeQuery, QueryRequest, RequestHeader,
+    AlterExpr, AuthHeader, CompactTableExpr, CreateTableExpr, DdlRequest, DeleteRequest,
+    DropTableExpr, FlushTableExpr, GreptimeRequest, InsertRequests, PromRangeQuery, QueryRequest,
+    RequestHeader, TruncateTableExpr,
 };
 use arrow_flight::{FlightData, Ticket};
-use common_error::prelude::*;
+use common_error::ext::{BoxedError, ErrorExt};
 use common_grpc::flight::{flight_messages_to_recordbatches, FlightDecoder, FlightMessage};
 use common_query::Output;
-use common_telemetry::logging;
+use common_telemetry::{logging, timer};
 use futures_util::{TryFutureExt, TryStreamExt};
 use prost::Message;
 use snafu::{ensure, ResultExt};

-use crate::error::{ConvertFlightDataSnafu, IllegalFlightMessagesSnafu};
-use crate::{error, Client, Result};
+use crate::error::{ConvertFlightDataSnafu, IllegalFlightMessagesSnafu, ServerSnafu};
+use crate::{error, from_grpc_response, metrics, Client, Result, StreamInserter};

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub struct Database {
    // The "catalog" and "schema" to be used in processing the requests at the server side.
    // They are the "hint" or "context", just like how the "database" in "USE" statement is treated in MySQL.
    // They will be carried in the request header.
    catalog: String,
    schema: String,
+    // The dbname follows naming rule as out mysql, postgres and http
+    // protocol. The server treat dbname in priority of catalog/schema.
+    dbname: String,

    client: Client,
    ctx: FlightContext,
 }

 impl Database {
+    /// Create database service client using catalog and schema
    pub fn new(catalog: impl Into<String>, schema: impl Into<String>, client: Client) -> Self {
        Self {
            catalog: catalog.into(),
            schema: schema.into(),
+            dbname: "".to_string(),
+            client,
+            ctx: FlightContext::default(),
+        }
+    }
+
+    /// Create database service client using dbname.
+    ///
+    /// This API is designed for external usage. `dbname` is:
+    ///
+    /// - the name of database when using GreptimeDB standalone or cluster
+    /// - the name provided by GreptimeCloud or other multi-tenant GreptimeDB
+    /// environment
+    pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
+        Self {
+            catalog: "".to_string(),
+            schema: "".to_string(),
+            dbname: dbname.into(),
            client,
            ctx: FlightContext::default(),
        }
@@ -72,27 +93,95 @@ impl Database {
        self.schema = schema.into();
    }

+    pub fn dbname(&self) -> &String {
+        &self.dbname
+    }
+
+    pub fn set_dbname(&mut self, dbname: impl Into<String>) {
+        self.dbname = dbname.into();
+    }
+
    pub fn set_auth(&mut self, auth: AuthScheme) {
        self.ctx.auth_header = Some(AuthHeader {
            auth_scheme: Some(auth),
        });
    }

-    pub async fn insert(&self, request: InsertRequest) -> Result<Output> {
-        self.do_get(Request::Insert(request)).await
+    pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
+        let _timer = timer!(metrics::METRIC_GRPC_INSERT);
+        self.handle(Request::Inserts(requests)).await
+    }
+
+    pub fn streaming_inserter(&self) -> Result<StreamInserter> {
+        self.streaming_inserter_with_channel_size(65536)
+    }
+
+    pub fn streaming_inserter_with_channel_size(
+        &self,
+        channel_size: usize,
+    ) -> Result<StreamInserter> {
+        let client = self.client.make_database_client()?.inner;
+
+        let stream_inserter = StreamInserter::new(
+            client,
+            self.dbname().to_string(),
+            self.ctx.auth_header.clone(),
+            channel_size,
+        );
+
+        Ok(stream_inserter)
+    }
+
+    pub async fn delete(&self, request: DeleteRequest) -> Result<u32> {
+        let _timer = timer!(metrics::METRIC_GRPC_DELETE);
+        self.handle(Request::Delete(request)).await
+    }
+
+    async fn handle(&self, request: Request) -> Result<u32> {
+        let mut client = self.client.make_database_client()?.inner;
+        let request = self.to_rpc_request(request, None);
+        let response = client.handle(request).await?.into_inner();
+        from_grpc_response(response)
+    }
+
+    #[inline]
+    fn to_rpc_request(&self, request: Request, trace_id: Option<u64>) -> GreptimeRequest {
+        GreptimeRequest {
+            header: Some(RequestHeader {
+                catalog: self.catalog.clone(),
+                schema: self.schema.clone(),
+                authorization: self.ctx.auth_header.clone(),
+                dbname: self.dbname.clone(),
+                trace_id,
+                span_id: None,
+            }),
+            request: Some(request),
+        }
    }

    pub async fn sql(&self, sql: &str) -> Result<Output> {
-        self.do_get(Request::Query(QueryRequest {
-            query: Some(Query::Sql(sql.to_string())),
-        }))
+        let _timer = timer!(metrics::METRIC_GRPC_SQL);
+        self.do_get(
+            Request::Query(QueryRequest {
+                query: Some(Query::Sql(sql.to_string())),
+            }),
+            None,
+        )
        .await
    }

-    pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
-        self.do_get(Request::Query(QueryRequest {
-            query: Some(Query::LogicalPlan(logical_plan)),
-        }))
+    pub async fn logical_plan(
+        &self,
+        logical_plan: Vec<u8>,
+        trace_id: Option<u64>,
+    ) -> Result<Output> {
+        let _timer = timer!(metrics::METRIC_GRPC_LOGICAL_PLAN);
+        self.do_get(
+            Request::Query(QueryRequest {
+                query: Some(Query::LogicalPlan(logical_plan)),
+            }),
+            trace_id,
+        )
        .await
    }

@@ -103,76 +192,119 @@ impl Database {
        end: &str,
        step: &str,
    ) -> Result<Output> {
-        self.do_get(Request::Query(QueryRequest {
-            query: Some(Query::PromRangeQuery(PromRangeQuery {
-                query: promql.to_string(),
-                start: start.to_string(),
-                end: end.to_string(),
-                step: step.to_string(),
-            })),
-        }))
+        let _timer = timer!(metrics::METRIC_GRPC_PROMQL_RANGE_QUERY);
+        self.do_get(
+            Request::Query(QueryRequest {
+                query: Some(Query::PromRangeQuery(PromRangeQuery {
+                    query: promql.to_string(),
+                    start: start.to_string(),
+                    end: end.to_string(),
+                    step: step.to_string(),
+                })),
+            }),
+            None,
+        )
        .await
    }

    pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
-        self.do_get(Request::Ddl(DdlRequest {
-            expr: Some(DdlExpr::CreateTable(expr)),
-        }))
+        let _timer = timer!(metrics::METRIC_GRPC_CREATE_TABLE);
+        self.do_get(
+            Request::Ddl(DdlRequest {
+                expr: Some(DdlExpr::CreateTable(expr)),
+            }),
+            None,
+        )
        .await
    }

    pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
-        self.do_get(Request::Ddl(DdlRequest {
-            expr: Some(DdlExpr::Alter(expr)),
-        }))
+        let _timer = timer!(metrics::METRIC_GRPC_ALTER);
+        self.do_get(
+            Request::Ddl(DdlRequest {
+                expr: Some(DdlExpr::Alter(expr)),
+            }),
+            None,
+        )
        .await
    }

    pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> {
-        self.do_get(Request::Ddl(DdlRequest {
-            expr: Some(DdlExpr::DropTable(expr)),
-        }))
+        let _timer = timer!(metrics::METRIC_GRPC_DROP_TABLE);
+        self.do_get(
+            Request::Ddl(DdlRequest {
+                expr: Some(DdlExpr::DropTable(expr)),
+            }),
+            None,
+        )
        .await
    }

-    async fn do_get(&self, request: Request) -> Result<Output> {
-        let request = GreptimeRequest {
-            header: Some(RequestHeader {
-                catalog: self.catalog.clone(),
-                schema: self.schema.clone(),
-                authorization: self.ctx.auth_header.clone(),
+    pub async fn flush_table(&self, expr: FlushTableExpr) -> Result<Output> {
+        let _timer = timer!(metrics::METRIC_GRPC_FLUSH_TABLE);
+        self.do_get(
+            Request::Ddl(DdlRequest {
+                expr: Some(DdlExpr::FlushTable(expr)),
            }),
-            request: Some(request),
-        };
+            None,
+        )
+        .await
+    }
+
+    pub async fn compact_table(&self, expr: CompactTableExpr) -> Result<Output> {
+        let _timer = timer!(metrics::METRIC_GRPC_COMPACT_TABLE);
+        self.do_get(
+            Request::Ddl(DdlRequest {
+                expr: Some(DdlExpr::CompactTable(expr)),
+            }),
+            None,
+        )
+        .await
+    }
+
+    pub async fn truncate_table(&self, expr: TruncateTableExpr) -> Result<Output> {
+        let _timer = timer!(metrics::METRIC_GRPC_TRUNCATE_TABLE);
+        self.do_get(
+            Request::Ddl(DdlRequest {
+                expr: Some(DdlExpr::TruncateTable(expr)),
+            }),
+            None,
+        )
+        .await
+    }
+
+    async fn do_get(&self, request: Request, trace_id: Option<u64>) -> Result<Output> {
+        // FIXME(paomian): should be added some labels for metrics
+        let _timer = timer!(metrics::METRIC_GRPC_DO_GET);
+        let request = self.to_rpc_request(request, trace_id);
        let request = Ticket {
            ticket: request.encode_to_vec().into(),
        };

-        let mut client = self.client.make_client()?;
+        let mut client = self.client.make_flight_client()?;

-        // TODO(LFC): Streaming get flight data.
        let flight_data: Vec<FlightData> = client
            .mut_inner()
            .do_get(request)
            .and_then(|response| response.into_inner().try_collect())
            .await
            .map_err(|e| {
-                let code = get_metadata_value(&e, INNER_ERROR_CODE)
-                    .and_then(|s| StatusCode::from_str(&s).ok())
-                    .unwrap_or(StatusCode::Unknown);
-                let msg = get_metadata_value(&e, INNER_ERROR_MSG).unwrap_or(e.to_string());
-                error::ExternalSnafu { code, msg }
+                let tonic_code = e.code();
+                let e: error::Error = e.into();
+                let code = e.status_code();
+                let msg = e.to_string();
+                ServerSnafu { code, msg }
                    .fail::<()>()
                    .map_err(BoxedError::new)
                    .context(error::FlightGetSnafu {
-                        tonic_code: e.code(),
+                        tonic_code,
                        addr: client.addr(),
                    })
                    .map_err(|error| {
                        logging::error!(
                            "Failed to do Flight get, addr: {}, code: {}, source: {}",
                            client.addr(),
-                            e.code(),
+                            tonic_code,
                            error
                        );
                        error
@@ -203,12 +335,6 @@ impl Database {
    }
 }

-fn get_metadata_value(e: &tonic::Status, key: &str) -> Option<String> {
-    e.metadata()
-        .get(key)
-        .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
-}
-
 #[derive(Default, Debug, Clone)]
 pub struct FlightContext {
    auth_header: Option<AuthHeader>,
--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -13,18 +13,19 @@
 // limitations under the License.

 use std::any::Any;
+use std::str::FromStr;

-use common_error::prelude::*;
-use tonic::Code;
+use common_error::ext::{BoxedError, ErrorExt};
+use common_error::status_code::StatusCode;
+use common_error::{INNER_ERROR_CODE, INNER_ERROR_MSG};
+use snafu::{Location, Snafu};
+use tonic::{Code, Status};

 #[derive(Debug, Snafu)]
 #[snafu(visibility(pub))]
 pub enum Error {
    #[snafu(display("Illegal Flight messages, reason: {}", reason))]
-    IllegalFlightMessages {
-        reason: String,
-        backtrace: Backtrace,
-    },
+    IllegalFlightMessages { reason: String, location: Location },

    #[snafu(display("Failed to do Flight get, code: {}, source: {}", tonic_code, source))]
    FlightGet {
@@ -35,24 +36,21 @@ pub enum Error {

    #[snafu(display("Failed to convert FlightData, source: {}", source))]
    ConvertFlightData {
-        #[snafu(backtrace)]
+        location: Location,
        source: common_grpc::Error,
    },

    #[snafu(display("Column datatype error, source: {}", source))]
    ColumnDataType {
-        #[snafu(backtrace)]
+        location: Location,
        source: api::error::Error,
    },

    #[snafu(display("Illegal GRPC client state: {}", err_msg))]
-    IllegalGrpcClientState {
-        err_msg: String,
-        backtrace: Backtrace,
-    },
+    IllegalGrpcClientState { err_msg: String, location: Location },

    #[snafu(display("Missing required field in protobuf, field: {}", field))]
-    MissingField { field: String, backtrace: Backtrace },
+    MissingField { field: String, location: Location },

    #[snafu(display(
        "Failed to create gRPC channel, peer address: {}, source: {}",
@@ -61,13 +59,19 @@ pub enum Error {
    ))]
    CreateChannel {
        addr: String,
-        #[snafu(backtrace)]
+        location: Location,
        source: common_grpc::error::Error,
    },

-    /// Error deserialized from gRPC metadata
+    // Server error carried in Tonic Status's metadata.
    #[snafu(display("{}", msg))]
-    ExternalError { code: StatusCode, msg: String },
+    Server { code: StatusCode, msg: String },
+
+    #[snafu(display("Illegal Database response: {err_msg}"))]
+    IllegalDatabaseResponse { err_msg: String },
+
+    #[snafu(display("Failed to send request with streaming: {}", err_msg))]
+    ClientStreaming { err_msg: String, location: Location },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -77,21 +81,38 @@ impl ErrorExt for Error {
        match self {
            Error::IllegalFlightMessages { .. }
            | Error::ColumnDataType { .. }
-            | Error::MissingField { .. } => StatusCode::Internal,
+            | Error::MissingField { .. }
+            | Error::IllegalDatabaseResponse { .. }
+            | Error::ClientStreaming { .. } => StatusCode::Internal,
+
+            Error::Server { code, .. } => *code,
            Error::FlightGet { source, .. } => source.status_code(),
-            Error::CreateChannel { source, .. } | Error::ConvertFlightData { source } => {
+            Error::CreateChannel { source, .. } | Error::ConvertFlightData { source, .. } => {
                source.status_code()
            }
            Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
-            Error::ExternalError { code, .. } => *code,
        }
    }

-    fn backtrace_opt(&self) -> Option<&Backtrace> {
-        ErrorCompat::backtrace(self)
-    }
-
    fn as_any(&self) -> &dyn Any {
        self
    }
 }
+
+impl From<Status> for Error {
+    fn from(e: Status) -> Self {
+        fn get_metadata_value(e: &Status, key: &str) -> Option<String> {
+            e.metadata()
+                .get(key)
+                .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
+        }
+
+        let code = get_metadata_value(&e, INNER_ERROR_CODE)
+            .and_then(|s| StatusCode::from_str(&s).ok())
+            .unwrap_or(StatusCode::Unknown);
+
+        let msg = get_metadata_value(&e, INNER_ERROR_MSG).unwrap_or(e.to_string());
+
+        Self::Server { code, msg }
+    }
+}
--- a/src/client/src/lib.rs
+++ b/src/client/src/lib.rs
@@ -13,13 +13,50 @@
 // limitations under the License.

 mod client;
+pub mod client_manager;
 mod database;
-mod error;
+pub mod error;
 pub mod load_balance;
+mod metrics;
+mod stream_insert;

 pub use api;
+use api::v1::greptime_response::Response;
+use api::v1::{AffectedRows, GreptimeResponse};
 pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
+use common_error::status_code::StatusCode;
+use snafu::OptionExt;

 pub use self::client::Client;
 pub use self::database::Database;
 pub use self::error::{Error, Result};
+pub use self::stream_insert::StreamInserter;
+use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};
+
+pub fn from_grpc_response(response: GreptimeResponse) -> Result<u32> {
+    let header = response.header.context(IllegalDatabaseResponseSnafu {
+        err_msg: "missing header",
+    })?;
+    let status = header.status.context(IllegalDatabaseResponseSnafu {
+        err_msg: "missing status",
+    })?;
+
+    if StatusCode::is_success(status.status_code) {
+        let res = response.response.context(IllegalDatabaseResponseSnafu {
+            err_msg: "missing response",
+        })?;
+        match res {
+            Response::AffectedRows(AffectedRows { value }) => Ok(value),
+        }
+    } else {
+        let status_code =
+            StatusCode::from_u32(status.status_code).context(IllegalDatabaseResponseSnafu {
+                err_msg: format!("invalid status: {:?}", status),
+            })?;
+        ServerSnafu {
+            code: status_code,
+            msg: status.err_msg,
+        }
+        .fail()
+    }
+}
--- a/src/client/src/load_balance.rs
+++ b/src/client/src/load_balance.rs
@@ -60,7 +60,7 @@ mod tests {
        let random = Random;
        for _ in 0..100 {
            let peer = random.get_peer(&peers).unwrap();
-            all.contains(peer);
+            assert!(all.contains(peer));
        }
    }
 }
--- a/src/client/src/metrics.rs
+++ b/src/client/src/metrics.rs
@@ -0,0 +1,27 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! client metrics
+pub const METRIC_GRPC_CREATE_TABLE: &str = "grpc.create_table";
+pub const METRIC_GRPC_PROMQL_RANGE_QUERY: &str = "grpc.promql.range_query";
+pub const METRIC_GRPC_INSERT: &str = "grpc.insert";
+pub const METRIC_GRPC_DELETE: &str = "grpc.delete";
+pub const METRIC_GRPC_SQL: &str = "grpc.sql";
+pub const METRIC_GRPC_LOGICAL_PLAN: &str = "grpc.logical_plan";
+pub const METRIC_GRPC_ALTER: &str = "grpc.alter";
+pub const METRIC_GRPC_DROP_TABLE: &str = "grpc.drop_table";
+pub const METRIC_GRPC_FLUSH_TABLE: &str = "grpc.flush_table";
+pub const METRIC_GRPC_COMPACT_TABLE: &str = "grpc.compact_table";
+pub const METRIC_GRPC_TRUNCATE_TABLE: &str = "grpc.truncate_table";
+pub const METRIC_GRPC_DO_GET: &str = "grpc.do_get";
--- a/src/client/src/stream_insert.rs
+++ b/src/client/src/stream_insert.rs
@@ -0,0 +1,105 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::greptime_database_client::GreptimeDatabaseClient;
+use api::v1::greptime_request::Request;
+use api::v1::{
+    AuthHeader, GreptimeRequest, GreptimeResponse, InsertRequest, InsertRequests, RequestHeader,
+};
+use tokio::sync::mpsc;
+use tokio::task::JoinHandle;
+use tokio_stream::wrappers::ReceiverStream;
+use tonic::transport::Channel;
+use tonic::{Response, Status};
+
+use crate::error::{self, Result};
+use crate::from_grpc_response;
+
+/// A structure that provides some methods for streaming data insert.
+///
+/// [`StreamInserter`] cannot be constructed via the `StreamInserter::new` method.
+/// You can use the following way to obtain [`StreamInserter`].
+///
+/// ```ignore
+/// let grpc_client = Client::with_urls(vec!["127.0.0.1:4002"]);
+/// let client = Database::new_with_dbname("db_name", grpc_client);
+/// let stream_inserter = client.streaming_inserter().unwrap();
+/// ```
+///
+/// If you want to see a concrete usage example, please see
+/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
+pub struct StreamInserter {
+    sender: mpsc::Sender<GreptimeRequest>,
+
+    auth_header: Option<AuthHeader>,
+
+    dbname: String,
+
+    join: JoinHandle<std::result::Result<Response<GreptimeResponse>, Status>>,
+}
+
+impl StreamInserter {
+    pub(crate) fn new(
+        mut client: GreptimeDatabaseClient<Channel>,
+        dbname: String,
+        auth_header: Option<AuthHeader>,
+        channel_size: usize,
+    ) -> StreamInserter {
+        let (send, recv) = tokio::sync::mpsc::channel(channel_size);
+
+        let join: JoinHandle<std::result::Result<Response<GreptimeResponse>, Status>> =
+            tokio::spawn(async move {
+                let recv_stream = ReceiverStream::new(recv);
+                client.handle_requests(recv_stream).await
+            });
+
+        StreamInserter {
+            sender: send,
+            auth_header,
+            dbname,
+            join,
+        }
+    }
+
+    pub async fn insert(&self, requests: Vec<InsertRequest>) -> Result<()> {
+        let inserts = InsertRequests { inserts: requests };
+        let request = self.to_rpc_request(Request::Inserts(inserts));
+
+        self.sender.send(request).await.map_err(|e| {
+            error::ClientStreamingSnafu {
+                err_msg: e.to_string(),
+            }
+            .build()
+        })
+    }
+
+    pub async fn finish(self) -> Result<u32> {
+        drop(self.sender);
+
+        let response = self.join.await.unwrap()?;
+        let response = response.into_inner();
+        from_grpc_response(response)
+    }
+
+    fn to_rpc_request(&self, request: Request) -> GreptimeRequest {
+        GreptimeRequest {
+            header: Some(RequestHeader {
+                authorization: self.auth_header.clone(),
+                dbname: self.dbname.clone(),
+                ..Default::default()
+            }),
+            request: Some(request),
+        }
+    }
+}
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -10,45 +10,59 @@ name = "greptime"
 path = "src/bin/greptime.rs"

 [features]
-mem-prof = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
+default = ["metrics-process"]
+tokio-console = ["common-telemetry/tokio-console"]
+metrics-process = ["servers/metrics-process"]
+greptimedb-telemetry = [
+    "datanode/greptimedb-telemetry",
+    "meta-srv/greptimedb-telemetry",
+]

 [dependencies]
 anymap = "1.0.0-beta.2"
+async-trait.workspace = true
 catalog = { path = "../catalog" }
+chrono.workspace = true
 clap = { version = "3.1", features = ["derive"] }
 client = { path = "../client" }
 common-base = { path = "../common/base" }
 common-error = { path = "../common/error" }
+common-meta = { path = "../common/meta" }
 common-query = { path = "../common/query" }
 common-recordbatch = { path = "../common/recordbatch" }
 common-telemetry = { path = "../common/telemetry", features = [
    "deadlock_detection",
 ] }
+config = "0.13"
 datanode = { path = "../datanode" }
+datatypes = { path = "../datatypes" }
 either = "1.8"
+etcd-client.workspace = true
 frontend = { path = "../frontend" }
 futures.workspace = true
 meta-client = { path = "../meta-client" }
 meta-srv = { path = "../meta-srv" }
+metrics.workspace = true
 nu-ansi-term = "0.46"
 partition = { path = "../partition" }
 query = { path = "../query" }
+rand.workspace = true
 rustyline = "10.1"
 serde.workspace = true
 servers = { path = "../servers" }
 session = { path = "../session" }
 snafu.workspace = true
 substrait = { path = "../common/substrait" }
-tikv-jemalloc-ctl = { version = "0.5", optional = true }
-tikv-jemallocator = { version = "0.5", optional = true }
+table = { path = "../table" }
+tikv-jemallocator = "0.5"
 tokio.workspace = true
-toml = "0.5"
-

 [dev-dependencies]
 common-test-util = { path = "../common/test-util" }
 rexpect = "0.5"
 serde.workspace = true
+temp-env = "0.3"
+toml.workspace = true

 [build-dependencies]
-build-data = "0.1.3"
+common-version = { path = "../common/version" }
--- a/src/cmd/build.rs
+++ b/src/cmd/build.rs
@@ -12,18 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-const DEFAULT_VALUE: &str = "unknown";
 fn main() {
-    println!(
-        "cargo:rustc-env=GIT_COMMIT={}",
-        build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
-    );
-    println!(
-        "cargo:rustc-env=GIT_BRANCH={}",
-        build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
-    );
-    println!(
-        "cargo:rustc-env=GIT_DIRTY={}",
-        build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
-    );
+    common_version::setup_git_versions();
 }
--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -12,27 +12,77 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#![doc = include_str!("../../../../README.md")]
+
 use std::fmt;

 use clap::Parser;
 use cmd::error::Result;
+use cmd::options::{Options, TopLevelOptions};
 use cmd::{cli, datanode, frontend, metasrv, standalone};
-use common_telemetry::logging::{error, info};
+use common_telemetry::logging::{error, info, TracingOptions};
+use metrics::gauge;

 #[derive(Parser)]
 #[clap(name = "greptimedb", version = print_version())]
 struct Command {
-    #[clap(long, default_value = "/tmp/greptimedb/logs")]
-    log_dir: String,
-    #[clap(long, default_value = "info")]
-    log_level: String,
+    #[clap(long)]
+    log_dir: Option<String>,
+    #[clap(long)]
+    log_level: Option<String>,
    #[clap(subcommand)]
    subcmd: SubCommand,
+
+    #[cfg(feature = "tokio-console")]
+    #[clap(long)]
+    tokio_console_addr: Option<String>,
+}
+
+pub enum Application {
+    Datanode(datanode::Instance),
+    Frontend(frontend::Instance),
+    Metasrv(metasrv::Instance),
+    Standalone(standalone::Instance),
+    Cli(cli::Instance),
+}
+
+impl Application {
+    async fn start(&mut self) -> Result<()> {
+        match self {
+            Application::Datanode(instance) => instance.start().await,
+            Application::Frontend(instance) => instance.start().await,
+            Application::Metasrv(instance) => instance.start().await,
+            Application::Standalone(instance) => instance.start().await,
+            Application::Cli(instance) => instance.start().await,
+        }
+    }
+
+    async fn stop(&self) -> Result<()> {
+        match self {
+            Application::Datanode(instance) => instance.stop().await,
+            Application::Frontend(instance) => instance.stop().await,
+            Application::Metasrv(instance) => instance.stop().await,
+            Application::Standalone(instance) => instance.stop().await,
+            Application::Cli(instance) => instance.stop().await,
+        }
+    }
 }

 impl Command {
-    async fn run(self) -> Result<()> {
-        self.subcmd.run().await
+    async fn build(self, opts: Options) -> Result<Application> {
+        self.subcmd.build(opts).await
+    }
+
+    fn load_options(&self) -> Result<Options> {
+        let top_level_opts = self.top_level_options();
+        self.subcmd.load_options(top_level_opts)
+    }
+
+    fn top_level_options(&self) -> TopLevelOptions {
+        TopLevelOptions {
+            log_dir: self.log_dir.clone(),
+            log_level: self.log_level.clone(),
+        }
    }
 }

@@ -51,13 +101,40 @@ enum SubCommand {
 }

 impl SubCommand {
-    async fn run(self) -> Result<()> {
+    async fn build(self, opts: Options) -> Result<Application> {
+        match (self, opts) {
+            (SubCommand::Datanode(cmd), Options::Datanode(dn_opts)) => {
+                let app = cmd.build(*dn_opts).await?;
+                Ok(Application::Datanode(app))
+            }
+            (SubCommand::Frontend(cmd), Options::Frontend(fe_opts)) => {
+                let app = cmd.build(*fe_opts).await?;
+                Ok(Application::Frontend(app))
+            }
+            (SubCommand::Metasrv(cmd), Options::Metasrv(meta_opts)) => {
+                let app = cmd.build(*meta_opts).await?;
+                Ok(Application::Metasrv(app))
+            }
+            (SubCommand::Standalone(cmd), Options::Standalone(opts)) => {
+                let app = cmd.build(opts.fe_opts, opts.dn_opts).await?;
+                Ok(Application::Standalone(app))
+            }
+            (SubCommand::Cli(cmd), Options::Cli(_)) => {
+                let app = cmd.build().await?;
+                Ok(Application::Cli(app))
+            }
+
+            _ => unreachable!(),
+        }
+    }
+
+    fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
        match self {
-            SubCommand::Datanode(cmd) => cmd.run().await,
-            SubCommand::Frontend(cmd) => cmd.run().await,
-            SubCommand::Metasrv(cmd) => cmd.run().await,
-            SubCommand::Standalone(cmd) => cmd.run().await,
-            SubCommand::Cli(cmd) => cmd.run().await,
+            SubCommand::Datanode(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Frontend(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Metasrv(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Standalone(cmd) => cmd.load_options(top_level_opts),
+            SubCommand::Cli(cmd) => cmd.load_options(top_level_opts),
        }
    }
 }
@@ -87,30 +164,71 @@ fn print_version() -> &'static str {
    )
 }

-#[cfg(feature = "mem-prof")]
+fn short_version() -> &'static str {
+    env!("CARGO_PKG_VERSION")
+}
+
+// {app_name}-{branch_name}-{commit_short}
+// The branch name (tag) of a release build should already contain the short
+// version so the full version doesn't concat the short version explicitly.
+fn full_version() -> &'static str {
+    concat!(
+        "greptimedb-",
+        env!("GIT_BRANCH"),
+        "-",
+        env!("GIT_COMMIT_SHORT")
+    )
+}
+
+fn log_env_flags() {
+    info!("command line arguments");
+    for argument in std::env::args() {
+        info!("argument: {}", argument);
+    }
+}
+
 #[global_allocator]
 static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

 #[tokio::main]
 async fn main() -> Result<()> {
    let cmd = Command::parse();
-    // TODO(dennis):
-    // 1. adds ip/port to app
    let app_name = &cmd.subcmd.to_string();
-    let log_dir = &cmd.log_dir;
-    let log_level = &cmd.log_level;
+
+    let opts = cmd.load_options()?;
+    let logging_opts = opts.logging_options();
+    let tracing_opts = TracingOptions {
+        #[cfg(feature = "tokio-console")]
+        tokio_console_addr: cmd.tokio_console_addr.clone(),
+    };

    common_telemetry::set_panic_hook();
    common_telemetry::init_default_metrics_recorder();
-    let _guard = common_telemetry::init_global_logging(app_name, log_dir, log_level, false);
+    let _guard = common_telemetry::init_global_logging(app_name, logging_opts, tracing_opts);
+
+    // Report app version as gauge.
+    gauge!("app_version", 1.0, "short_version" => short_version(), "version" => full_version());
+
+    // Log version and argument flags.
+    info!(
+        "short_version: {}, full_version: {}",
+        short_version(),
+        full_version()
+    );
+    log_env_flags();
+
+    let mut app = cmd.build(opts).await?;

    tokio::select! {
-        result = cmd.run() => {
+        result = app.start() => {
            if let Err(err) = result {
                error!(err; "Fatal error occurs!");
            }
        }
        _ = tokio::signal::ctrl_c() => {
+            if let Err(err) = app.stop().await {
+                error!(err; "Fatal error occurs!");
+            }
            info!("Goodbye!");
        }
    }
--- a/src/cmd/src/cli.rs
+++ b/src/cmd/src/cli.rs
@@ -12,14 +12,44 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+mod bench;
 mod cmd;
 mod helper;
 mod repl;
+mod upgrade;

+use async_trait::async_trait;
+use bench::BenchTableMetadataCommand;
 use clap::Parser;
-use repl::Repl;
+use common_telemetry::logging::LoggingOptions;
+pub use repl::Repl;
+use upgrade::UpgradeCommand;

 use crate::error::Result;
+use crate::options::{Options, TopLevelOptions};
+
+#[async_trait]
+pub trait Tool {
+    async fn do_work(&self) -> Result<()>;
+}
+
+pub enum Instance {
+    Repl(Repl),
+    Tool(Box<dyn Tool>),
+}
+
+impl Instance {
+    pub async fn start(&mut self) -> Result<()> {
+        match self {
+            Instance::Repl(repl) => repl.run().await,
+            Instance::Tool(tool) => tool.do_work().await,
+        }
+    }
+
+    pub async fn stop(&self) -> Result<()> {
+        Ok(())
+    }
+}

 #[derive(Parser)]
 pub struct Command {
@@ -28,20 +58,35 @@ pub struct Command {
 }

 impl Command {
-    pub async fn run(self) -> Result<()> {
-        self.cmd.run().await
+    pub async fn build(self) -> Result<Instance> {
+        self.cmd.build().await
+    }
+
+    pub fn load_options(&self, top_level_opts: TopLevelOptions) -> Result<Options> {
+        let mut logging_opts = LoggingOptions::default();
+        if let Some(dir) = top_level_opts.log_dir {
+            logging_opts.dir = dir;
+        }
+        if top_level_opts.log_level.is_some() {
+            logging_opts.level = top_level_opts.log_level;
+        }
+        Ok(Options::Cli(Box::new(logging_opts)))
    }
 }

 #[derive(Parser)]
 enum SubCommand {
    Attach(AttachCommand),
+    Upgrade(UpgradeCommand),
+    Bench(BenchTableMetadataCommand),
 }

 impl SubCommand {
-    async fn run(self) -> Result<()> {
+    async fn build(self) -> Result<Instance> {
        match self {
-            SubCommand::Attach(cmd) => cmd.run().await,
+            SubCommand::Attach(cmd) => cmd.build().await,
+            SubCommand::Upgrade(cmd) => cmd.build().await,
+            SubCommand::Bench(cmd) => cmd.build().await,
        }
    }
 }
@@ -57,8 +102,51 @@ pub(crate) struct AttachCommand {
 }

 impl AttachCommand {
-    async fn run(self) -> Result<()> {
-        let mut repl = Repl::try_new(&self).await?;
-        repl.run().await
+    async fn build(self) -> Result<Instance> {
+        let repl = Repl::try_new(&self).await?;
+        Ok(Instance::Repl(repl))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_load_options() {
+        let cmd = Command {
+            cmd: SubCommand::Attach(AttachCommand {
+                grpc_addr: String::from(""),
+                meta_addr: None,
+                disable_helper: false,
+            }),
+        };
+
+        let opts = cmd.load_options(TopLevelOptions::default()).unwrap();
+        let logging_opts = opts.logging_options();
+        assert_eq!("/tmp/greptimedb/logs", logging_opts.dir);
+        assert!(logging_opts.level.is_none());
+        assert!(!logging_opts.enable_jaeger_tracing);
+    }
+
+    #[test]
+    fn test_top_level_options() {
+        let cmd = Command {
+            cmd: SubCommand::Attach(AttachCommand {
+                grpc_addr: String::from(""),
+                meta_addr: None,
+                disable_helper: false,
+            }),
+        };
+
+        let opts = cmd
+            .load_options(TopLevelOptions {
+                log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
+                log_level: Some("debug".to_string()),
+            })
+            .unwrap();
+        let logging_opts = opts.logging_options();
+        assert_eq!("/tmp/greptimedb/test/logs", logging_opts.dir);
+        assert_eq!("debug", logging_opts.level.as_ref().unwrap());
    }
 }
--- a/src/cmd/src/cli/bench.rs
+++ b/src/cmd/src/cli/bench.rs
@@ -0,0 +1,199 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod datanode_table;
+mod table_info;
+mod table_name;
+mod table_region;
+
+use std::future::Future;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use async_trait::async_trait;
+use clap::Parser;
+use common_meta::key::table_region::RegionDistribution;
+use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
+use common_meta::table_name::TableName;
+use common_telemetry::info;
+use datatypes::data_type::ConcreteDataType;
+use datatypes::schema::{ColumnSchema, RawSchema};
+use meta_srv::service::store::etcd::EtcdStore;
+use meta_srv::service::store::kv::KvBackendAdapter;
+use rand::prelude::SliceRandom;
+use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType};
+
+use crate::cli::bench::datanode_table::DatanodeTableBencher;
+use crate::cli::bench::table_info::TableInfoBencher;
+use crate::cli::bench::table_name::TableNameBencher;
+use crate::cli::bench::table_region::TableRegionBencher;
+use crate::cli::{Instance, Tool};
+use crate::error::Result;
+
+async fn bench<F, Fut>(desc: &str, f: F, count: u32)
+where
+    F: Fn(u32) -> Fut,
+    Fut: Future<Output = ()>,
+{
+    let mut total = Duration::default();
+
+    for i in 1..=count {
+        let start = Instant::now();
+
+        f(i).await;
+
+        total += start.elapsed();
+    }
+
+    let cost = total.as_millis() as f64 / count as f64;
+    info!("{desc}, average operation cost: {cost:.2} ms");
+}
+
+async fn bench_self_recorded<F, Fut>(desc: &str, f: F, count: u32)
+where
+    F: Fn(u32) -> Fut,
+    Fut: Future<Output = Duration>,
+{
+    let mut total = Duration::default();
+
+    for i in 1..=count {
+        total += f(i).await;
+    }
+
+    let cost = total.as_millis() as f64 / count as f64;
+    info!("{desc}, average operation cost: {cost:.2} ms");
+}
+
+#[derive(Debug, Default, Parser)]
+pub struct BenchTableMetadataCommand {
+    #[clap(long)]
+    etcd_addr: String,
+    #[clap(long)]
+    count: u32,
+}
+
+impl BenchTableMetadataCommand {
+    pub async fn build(&self) -> Result<Instance> {
+        let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap();
+
+        let table_metadata_manager = Arc::new(TableMetadataManager::new(KvBackendAdapter::wrap(
+            etcd_store,
+        )));
+
+        let tool = BenchTableMetadata {
+            table_metadata_manager,
+            count: self.count,
+        };
+        Ok(Instance::Tool(Box::new(tool)))
+    }
+}
+
+struct BenchTableMetadata {
+    table_metadata_manager: TableMetadataManagerRef,
+    count: u32,
+}
+
+#[async_trait]
+impl Tool for BenchTableMetadata {
+    async fn do_work(&self) -> Result<()> {
+        info!("Start benching table name manager ...");
+        TableNameBencher::new(self.table_metadata_manager.table_name_manager(), self.count)
+            .start()
+            .await;
+
+        info!("Start benching table info manager ...");
+        TableInfoBencher::new(self.table_metadata_manager.table_info_manager(), self.count)
+            .start()
+            .await;
+
+        info!("Start benching table region manager ...");
+        TableRegionBencher::new(
+            self.table_metadata_manager.table_region_manager(),
+            self.count,
+        )
+        .start()
+        .await;
+
+        info!("Start benching datanode table manager ...");
+        DatanodeTableBencher::new(
+            self.table_metadata_manager.datanode_table_manager(),
+            self.count,
+        )
+        .start()
+        .await;
+        Ok(())
+    }
+}
+
+fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
+    let columns = 100;
+    let mut column_schemas = Vec::with_capacity(columns);
+    column_schemas.push(
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            true,
+        )
+        .with_time_index(true),
+    );
+
+    for i in 1..columns {
+        let column_name = format!("my_column_{i}");
+        column_schemas.push(ColumnSchema::new(
+            column_name,
+            ConcreteDataType::string_datatype(),
+            true,
+        ));
+    }
+
+    let meta = RawTableMeta {
+        schema: RawSchema::new(column_schemas),
+        engine: "mito".to_string(),
+        created_on: chrono::DateTime::default(),
+        primary_key_indices: vec![],
+        next_column_id: columns as u32 + 1,
+        engine_options: Default::default(),
+        value_indices: vec![],
+        options: Default::default(),
+        region_numbers: (1..=100).collect(),
+        partition_key_indices: vec![],
+    };
+
+    RawTableInfo {
+        ident: TableIdent {
+            table_id,
+            version: 1,
+        },
+        name: table_name.table_name,
+        desc: Some("blah".to_string()),
+        catalog_name: table_name.catalog_name,
+        schema_name: table_name.schema_name,
+        meta,
+        table_type: TableType::Base,
+    }
+}
+
+fn create_region_distribution() -> RegionDistribution {
+    let mut regions = (1..=100).collect::<Vec<u32>>();
+    regions.shuffle(&mut rand::thread_rng());
+
+    let mut region_distribution = RegionDistribution::new();
+    for datanode_id in 0..10 {
+        region_distribution.insert(
+            datanode_id as u64,
+            regions[datanode_id * 10..(datanode_id + 1) * 10].to_vec(),
+        );
+    }
+    region_distribution
+}
--- a/src/cmd/src/cli/bench/datanode_table.rs
+++ b/src/cmd/src/cli/bench/datanode_table.rs
@@ -0,0 +1,131 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableManager};
+
+use super::bench;
+
+pub struct DatanodeTableBencher<'a> {
+    datanode_table_manager: &'a DatanodeTableManager,
+    count: u32,
+}
+
+impl<'a> DatanodeTableBencher<'a> {
+    pub fn new(datanode_table_manager: &'a DatanodeTableManager, count: u32) -> Self {
+        Self {
+            datanode_table_manager,
+            count,
+        }
+    }
+
+    pub async fn start(&self) {
+        self.bench_create().await;
+        self.bench_get().await;
+        self.bench_move_region().await;
+        self.bench_tables().await;
+        self.bench_remove().await;
+    }
+
+    async fn bench_create(&self) {
+        let desc = format!(
+            "DatanodeTableBencher: create {} datanode table keys",
+            self.count
+        );
+        bench(
+            &desc,
+            |i| async move {
+                self.datanode_table_manager
+                    .create(1, i, vec![1, 2, 3, 4])
+                    .await
+                    .unwrap();
+            },
+            self.count,
+        )
+        .await;
+    }
+
+    async fn bench_get(&self) {
+        let desc = format!(
+            "DatanodeTableBencher: get {} datanode table keys",
+            self.count
+        );
+        bench(
+            &desc,
+            |i| async move {
+                let key = DatanodeTableKey::new(1, i);
+                assert!(self
+                    .datanode_table_manager
+                    .get(&key)
+                    .await
+                    .unwrap()
+                    .is_some());
+            },
+            self.count,
+        )
+        .await;
+    }
+
+    async fn bench_move_region(&self) {
+        let desc = format!(
+            "DatanodeTableBencher: move {} datanode table regions",
+            self.count
+        );
+        bench(
+            &desc,
+            |i| async move {
+                self.datanode_table_manager
+                    .move_region(1, 2, i, 1)
+                    .await
+                    .unwrap();
+            },
+            self.count,
+        )
+        .await;
+    }
+
+    async fn bench_tables(&self) {
+        let desc = format!(
+            "DatanodeTableBencher: list {} datanode table keys",
+            self.count
+        );
+        bench(
+            &desc,
+            |_| async move {
+                assert!(!self
+                    .datanode_table_manager
+                    .tables(1)
+                    .await
+                    .unwrap()
+                    .is_empty());
+            },
+            self.count,
+        )
+        .await;
+    }
+
+    async fn bench_remove(&self) {
+        let desc = format!(
+            "DatanodeTableBencher: remove {} datanode table keys",
+            self.count
+        );
+        bench(
+            &desc,
+            |i| async move {
+                self.datanode_table_manager.remove(1, i).await.unwrap();
+            },
+            self.count,
+        )
+        .await;
+    }
+}
--- a/Show More
+++ b/Show More