fix: conn pool leak & placeholder feature so ci can compile

fix: placeholder feature so ci can compile
fix: time window filter expr use OR
2025-12-25 07:30:02 +00:00 · 2025-04-10 15:01:07 +08:00 · 2025-04-08 14:37:55 +08:00 · 2025-04-07 16:50:17 +08:00 · 2025-04-07 16:50:17 +08:00 · 2025-04-07 16:50:17 +08:00
427 changed files with 24150 additions and 10635 deletions
--- a/.github/actions/build-greptime-images/action.yml
+++ b/.github/actions/build-greptime-images/action.yml
@@ -34,8 +34,8 @@ inputs:
    required: true
  push-latest-tag:
    description: Whether to push the latest tag
-    required: false
-    default: 'true'
+    required: true
+    default: 'false'
 runs:
  using: composite
  steps:
@@ -47,7 +47,11 @@ runs:
        password: ${{ inputs.image-registry-password }}

    - name: Set up qemu for multi-platform builds
-      uses: docker/setup-qemu-action@v2
+      uses: docker/setup-qemu-action@v3
+      with:
+        platforms: linux/amd64,linux/arm64
+        # The latest version will lead to segmentation fault.
+        image: tonistiigi/binfmt:qemu-v7.0.0-28

    - name: Set up buildx
      uses: docker/setup-buildx-action@v2
--- a/.github/actions/build-images/action.yml
+++ b/.github/actions/build-images/action.yml
@@ -22,8 +22,8 @@ inputs:
    required: true
  push-latest-tag:
    description: Whether to push the latest tag
-    required: false
-    default: 'true'
+    required: true
+    default: 'false'
  dev-mode:
    description: Enable dev mode, only build standard greptime
    required: false
--- a/.github/actions/release-cn-artifacts/action.yaml
+++ b/.github/actions/release-cn-artifacts/action.yaml
@@ -51,8 +51,8 @@ inputs:
    required: true
  upload-to-s3:
    description: Upload to S3
-    required: false
-    default: 'true'
+    required: true
+    default: 'false'
  artifacts-dir:
    description: Directory to store artifacts
    required: false
@@ -77,13 +77,21 @@ runs:
      with:
        path: ${{ inputs.artifacts-dir }}

+    - name: Install s5cmd
+      shell: bash
+      run: |
+        wget https://github.com/peak/s5cmd/releases/download/v2.3.0/s5cmd_2.3.0_Linux-64bit.tar.gz
+        tar -xzf s5cmd_2.3.0_Linux-64bit.tar.gz
+        sudo mv s5cmd /usr/local/bin/
+        sudo chmod +x /usr/local/bin/s5cmd
+
    - name: Release artifacts to cn region
      uses: nick-invision/retry@v2
      if: ${{ inputs.upload-to-s3 == 'true' }}
      env:
        AWS_ACCESS_KEY_ID: ${{ inputs.aws-cn-access-key-id }}
        AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-cn-secret-access-key }}
-        AWS_DEFAULT_REGION: ${{ inputs.aws-cn-region }}
+        AWS_REGION: ${{ inputs.aws-cn-region }}
        UPDATE_VERSION_INFO: ${{ inputs.update-version-info }}
      with:
        max_attempts: ${{ inputs.upload-max-retry-times }}
--- a/.github/scripts/upload-artifacts-to-s3.sh
+++ b/.github/scripts/upload-artifacts-to-s3.sh
@@ -33,7 +33,7 @@ function upload_artifacts() {
  #    ├── greptime-darwin-amd64-v0.2.0.sha256sum
  #    └── greptime-darwin-amd64-v0.2.0.tar.gz
  find "$ARTIFACTS_DIR" -type f \( -name "*.tar.gz" -o -name "*.sha256sum" \) | while IFS= read -r file; do
-    aws s3 cp \
+    s5cmd cp \
      "$file" "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/$VERSION/$(basename "$file")"
  done
 }
@@ -45,7 +45,7 @@ function update_version_info() {
    if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
      echo "Updating latest-version.txt"
      echo "$VERSION" > latest-version.txt
-      aws s3 cp \
+      s5cmd cp \
        latest-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-version.txt"
    fi

@@ -53,7 +53,7 @@ function update_version_info() {
    if [[ "$VERSION" == *"nightly"* ]]; then
      echo "Updating latest-nightly-version.txt"
      echo "$VERSION" > latest-nightly-version.txt
-      aws s3 cp \
+      s5cmd cp \
        latest-nightly-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-nightly-version.txt"
    fi
  fi
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -17,6 +17,8 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v4
+      with:
+        persist-credentials: false
    - uses: arduino/setup-protoc@v3
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/dependency-check.yml
+++ b/.github/workflows/dependency-check.yml
@@ -12,6 +12,8 @@ jobs:
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
+      with:
+        persist-credentials: false

    - name: Set up Rust
      uses: actions-rust-lang/setup-rust-toolchain@v1
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -76,15 +76,9 @@ env:

  NIGHTLY_RELEASE_PREFIX: nightly

-  # Use the different image name to avoid conflict with the release images.
-  IMAGE_NAME: greptimedb-dev
-
  # The source code will check out in the following path: '${WORKING_DIR}/dev/greptime'.
  CHECKOUT_GREPTIMEDB_PATH: dev/greptimedb

-permissions:
-  issues: write
-
 jobs:
  allocate-runners:
    name: Allocate runners
@@ -107,6 +101,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Create version
        id: create-version
@@ -161,6 +156,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Checkout greptimedb
        uses: actions/checkout@v4
@@ -168,6 +164,7 @@ jobs:
          repository: ${{ inputs.repository }}
          ref: ${{ inputs.commit }}
          path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
+          persist-credentials: true

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -192,6 +189,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Checkout greptimedb
        uses: actions/checkout@v4
@@ -199,6 +197,7 @@ jobs:
          repository: ${{ inputs.repository }}
          ref: ${{ inputs.commit }}
          path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
+          persist-credentials: true

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -226,13 +225,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Build and push images to dockerhub
        uses: ./.github/actions/build-images
        with:
          image-registry: docker.io
          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          image-name: ${{ env.IMAGE_NAME }}
+          image-name: ${{ vars.DEV_BUILD_IMAGE_NAME }}
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
@@ -257,13 +257,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Release artifacts to CN region
        uses: ./.github/actions/release-cn-artifacts
        with:
          src-image-registry: docker.io
          src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          src-image-name: ${{ env.IMAGE_NAME }}
+          src-image-name: ${{ vars.DEV_BUILD_IMAGE_NAME }}
          dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
          dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
          dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -273,6 +274,7 @@ jobs:
          aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+          upload-to-s3: false
          dev-mode: true                     # Only build the standard images(exclude centos images).
          push-latest-tag: false             # Don't push the latest tag to registry.
          update-version-info: false         # Don't update the version info in S3.
@@ -291,6 +293,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -316,6 +319,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -334,10 +338,16 @@ jobs:
      release-images-to-dockerhub
    ]
    runs-on: ubuntu-20.04
+    permissions:
+      issues: write
+
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -26,6 +26,8 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: crate-ci/typos@master
      - name: Check the config docs
        run: |
@@ -38,6 +40,8 @@ jobs:
    name: Check License Header
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: korandoru/hawkeye@v5

  check:
@@ -49,6 +53,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -70,6 +76,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: actions-rust-lang/setup-rust-toolchain@v1
      - name: Install taplo
        run: cargo +stable install taplo-cli --version ^0.9 --locked --force
@@ -85,6 +93,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -139,6 +149,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -192,6 +204,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -238,6 +252,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -295,6 +311,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - name: Setup Kind
        uses: ./.github/actions/setup-kind
      - if: matrix.mode.minio
@@ -437,6 +455,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - name: Setup Kind
        uses: ./.github/actions/setup-kind
      - name: Setup Chaos Mesh
@@ -562,6 +582,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - if: matrix.mode.kafka
        name: Setup kafka server
        working-directory: tests-integration/fixtures
@@ -589,6 +611,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -604,6 +628,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -626,6 +652,8 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - name: Merge Conflict Finder
        uses: olivernybroe/action-conflict-finder@v4.0

@@ -636,6 +664,8 @@ jobs:
    needs:  [conflict-check, clippy, fmt]
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -684,6 +714,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docbot.yml
+++ b/.github/workflows/docbot.yml
@@ -3,16 +3,21 @@ on:
  pull_request_target:
    types: [opened, edited]

-permissions:
-  pull-requests: write
-  contents: read
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true

 jobs:
  docbot:
    runs-on: ubuntu-20.04
+    permissions:
+      pull-requests: write
+      contents: read
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Maybe Follow Up Docs Issue
        working-directory: cyborg
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -34,6 +34,8 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: crate-ci/typos@master

  license-header-check:
@@ -41,6 +43,8 @@ jobs:
    name: Check License Header
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: korandoru/hawkeye@v5

  check:
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -66,13 +66,6 @@ env:

  NIGHTLY_RELEASE_PREFIX: nightly

-  # Use the different image name to avoid conflict with the release images.
-  # The DockerHub image will be greptime/greptimedb-nightly.
-  IMAGE_NAME: greptimedb-nightly
-
-permissions:
-  issues: write
-
 jobs:
  allocate-runners:
    name: Allocate runners
@@ -95,6 +88,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Create version
        id: create-version
@@ -147,6 +141,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -168,6 +163,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -193,17 +189,18 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Build and push images to dockerhub
        uses: ./.github/actions/build-images
        with:
          image-registry: docker.io
          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          image-name: ${{ env.IMAGE_NAME }}
+          image-name: ${{ vars.NIGHTLY_BUILD_IMAGE_NAME }}
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: true
+          push-latest-tag: false

      - name: Set nightly build result
        id: set-nightly-build-result
@@ -226,13 +223,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Release artifacts to CN region
        uses: ./.github/actions/release-cn-artifacts
        with:
          src-image-registry: docker.io
          src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          src-image-name: ${{ env.IMAGE_NAME }}
+          src-image-name: ${{ vars.NIGHTLY_BUILD_IMAGE_NAME }}
          dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
          dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
          dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -242,9 +240,10 @@ jobs:
          aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+          upload-to-s3: false
          dev-mode: false
          update-version-info: false  # Don't update version info in S3.
-          push-latest-tag: true
+          push-latest-tag: false

  stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
    name: Stop linux-amd64 runner
@@ -260,6 +259,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -285,6 +285,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -303,10 +304,14 @@ jobs:
      release-images-to-dockerhub
    ]
    runs-on: ubuntu-20.04
+    permissions:
+      issues: write
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -9,9 +9,6 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

-permissions:
-  issues: write
-
 jobs:
  sqlness-test:
    name: Run sqlness test
@@ -22,6 +19,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Check install.sh
        run: ./.github/scripts/check-install-script.sh
@@ -46,9 +44,14 @@ jobs:
    name: Sqlness tests on Windows
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    runs-on: windows-2022-8-cores
+    permissions:
+      issues: write
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - uses: arduino/setup-protoc@v3
        with:
@@ -76,6 +79,9 @@ jobs:
    steps:
      - run: git config --global core.autocrlf false
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - uses: arduino/setup-protoc@v3
        with:
@@ -111,9 +117,13 @@ jobs:
  cleanbuild-linux-nix:
    name: Run clean build on Linux
    runs-on: ubuntu-latest
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: cachix/install-nix-action@v27
        with:
          nix_path: nixpkgs=channel:nixos-24.11
@@ -141,6 +151,9 @@ jobs:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/release-dev-builder-images.yaml
+++ b/.github/workflows/release-dev-builder-images.yaml
@@ -37,6 +37,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Configure build image version
        id: set-version
@@ -85,48 +86,66 @@ jobs:
      - name: Push dev-builder-ubuntu image
        shell: bash
        if: ${{ inputs.release_dev_builder_ubuntu_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
+          ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION

          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:latest \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-ubuntu:latest
+
      - name: Push dev-builder-centos image
        shell: bash
        if: ${{ inputs.release_dev_builder_centos_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
+          ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION

          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:latest \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:latest
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:latest \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-centos:latest
+
      - name: Push dev-builder-android image
        shell: bash
        if: ${{ inputs.release_dev_builder_android_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
+          ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION

          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:latest \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:latest
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:latest \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-android:latest
+
  release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
    name: Release dev builder images to CN region
    runs-on: ubuntu-20.04
@@ -144,29 +163,41 @@ jobs:
      - name: Push dev-builder-ubuntu image
        shell: bash
        if: ${{ inputs.release_dev_builder_ubuntu_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION \
+            docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION

      - name: Push dev-builder-centos image
        shell: bash
        if: ${{ inputs.release_dev_builder_centos_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION \
+            docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION

      - name: Push dev-builder-android image
        shell: bash
        if: ${{ inputs.release_dev_builder_android_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
+            docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -93,11 +93,6 @@ env:
  # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
  NEXT_RELEASE_VERSION: v0.12.0

-# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
-permissions:
-  issues: write # Allows the action to create issues for cyborg.
-  contents: write # Allows the action to create a release.
-
 jobs:
  allocate-runners:
    name: Allocate runners
@@ -122,6 +117,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Check Rust toolchain version
        shell: bash
@@ -181,6 +177,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -202,6 +199,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -237,6 +235,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-macos-artifacts
        with:
@@ -276,6 +275,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-windows-artifacts
        with:
@@ -306,15 +306,18 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Build and push images to dockerhub
        uses: ./.github/actions/build-images
        with:
          image-registry: docker.io
          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
+          image-name: ${{ vars.GREPTIMEDB_IMAGE_NAME }}
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
+          push-latest-tag: true

      - name: Set build image result
        id: set-build-image-result
@@ -341,13 +344,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Release artifacts to CN region
        uses: ./.github/actions/release-cn-artifacts
        with:
          src-image-registry: docker.io
          src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          src-image-name: greptimedb
+          src-image-name: ${{ vars.GREPTIMEDB_IMAGE_NAME }}
          dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
          dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
          dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -358,6 +362,7 @@ jobs:
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
          dev-mode: false
+          upload-to-s3: true
          update-version-info: true
          push-latest-tag: true

@@ -377,6 +382,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Publish GitHub release
        uses: ./.github/actions/publish-github-release
@@ -400,6 +406,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -425,6 +432,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -441,8 +449,15 @@ jobs:
    if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [allocate-runners]
    runs-on: ubuntu-20.04
+    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
+    permissions:
+      issues: write # Allows the action to create issues for cyborg.
+      contents: write # Allows the action to create a release.
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Bump doc version
        working-directory: cyborg
@@ -461,10 +476,17 @@ jobs:
      build-windows-artifacts,
    ]
    runs-on: ubuntu-20.04
+    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
+    permissions:
+      issues: write # Allows the action to create issues for cyborg.
+      contents: write # Allows the action to create a release.
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/schedule.yml
+++ b/.github/workflows/schedule.yml
@@ -4,18 +4,20 @@ on:
    - cron: '4 2 * * *'
  workflow_dispatch:

-permissions:
-  contents: read
-  issues: write
-  pull-requests: write

 jobs:
  maintenance:
    name: Periodic Maintenance
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      issues: write
+      pull-requests: write
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Do Maintenance
        working-directory: cyborg
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -1,18 +1,24 @@
 name: "Semantic Pull Request"

 on:
-  pull_request_target:
+  pull_request:
    types:
      - opened
      - reopened
      - edited

+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 jobs:
  check:
    runs-on: ubuntu-20.04
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Check Pull Request
        working-directory: cyborg
--- a/AUTHOR.md
+++ b/AUTHOR.md
@@ -3,30 +3,28 @@
 ## Individual Committers (in alphabetical order)

 * [CookiePieWw](https://github.com/CookiePieWw)
-* [KKould](https://github.com/KKould)
-* [NiwakaDev](https://github.com/NiwakaDev)
 * [etolbakov](https://github.com/etolbakov)
 * [irenjj](https://github.com/irenjj)
-* [tisonkun](https://github.com/tisonkun)
+* [KKould](https://github.com/KKould)
 * [Lanqing Yang](https://github.com/lyang24)
+* [NiwakaDev](https://github.com/NiwakaDev)
+* [tisonkun](https://github.com/tisonkun)
+

 ## Team Members (in alphabetical order)

-* [Breeze-P](https://github.com/Breeze-P)
-* [GrepTime](https://github.com/GrepTime)
-* [MichaelScofield](https://github.com/MichaelScofield)
-* [Wenjie0329](https://github.com/Wenjie0329)
-* [WenyXu](https://github.com/WenyXu)
-* [ZonaHex](https://github.com/ZonaHex)
 * [apdong2022](https://github.com/apdong2022)
 * [beryl678](https://github.com/beryl678)
+* [Breeze-P](https://github.com/Breeze-P)
 * [daviderli614](https://github.com/daviderli614)
 * [discord9](https://github.com/discord9)
 * [evenyag](https://github.com/evenyag)
 * [fengjiachun](https://github.com/fengjiachun)
 * [fengys1996](https://github.com/fengys1996)
+* [GrepTime](https://github.com/GrepTime)
 * [holalengyu](https://github.com/holalengyu)
 * [killme2008](https://github.com/killme2008)
+* [MichaelScofield](https://github.com/MichaelScofield)
 * [nicecui](https://github.com/nicecui)
 * [paomian](https://github.com/paomian)
 * [shuiyisong](https://github.com/shuiyisong)
@@ -34,11 +32,14 @@
 * [sunng87](https://github.com/sunng87)
 * [v0y4g3r](https://github.com/v0y4g3r)
 * [waynexia](https://github.com/waynexia)
+* [Wenjie0329](https://github.com/Wenjie0329)
+* [WenyXu](https://github.com/WenyXu)
 * [xtang](https://github.com/xtang)
 * [zhaoyingnan01](https://github.com/zhaoyingnan01)
 * [zhongzc](https://github.com/zhongzc)
+* [ZonaHex](https://github.com/ZonaHex)
 * [zyy17](https://github.com/zyy17)

 ## All Contributors

-[![All Contributors](https://contrib.rocks/image?repo=GreptimeTeam/greptimedb)](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)
+To see the full list of contributors, please visit our [Contributors page](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,6 +81,7 @@ rust.unknown_lints = "deny"
 rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }

 [workspace.dependencies]
+# DO_NOT_REMOVE_THIS: BEGIN_OF_EXTERNAL_DEPENDENCIES
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
 # selectively turn them on if needed, since we can override default-features = true (from false)
 # for the inherited dependency but cannot do the reverse (override from true to false).
@@ -106,6 +107,7 @@ bitflags = "2.4.1"
 bytemuck = "1.12"
 bytes = { version = "1.7", features = ["serde"] }
 chrono = { version = "0.4", features = ["serde"] }
+chrono-tz = "0.10.1"
 clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
@@ -127,7 +129,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "683e9d10ae7f3dfb8aaabd89082fc600c17e3795" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "072ce580502e015df1a6b03a185b60309a7c2a7a" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -138,8 +140,8 @@ itertools = "0.10"
 jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
 lazy_static = "1.4"
 local-ip-address = "0.6"
-loki-api = { git = "https://github.com/shuiyisong/tracing-loki", branch = "chore/prost_version" }
-meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
+loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "1434ecf23a2654025d86188fb5205e7a74b225d3" }
+meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" }
 mockall = "0.11.4"
 moka = "0.12"
 nalgebra = "0.33"
@@ -158,7 +160,9 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.4.3", features = ["ser"] }
+promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
+    "ser",
+], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
 prost = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.8"
@@ -207,6 +211,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"]
 typetag = "0.2"
 uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
 zstd = "0.13"
+# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES

 ## workspaces members
 api = { path = "src/api" }
@@ -278,12 +283,10 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls", rev = "46
 # This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
 # see https://github.com/aws/aws-lc-rs/pull/526
 # aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
-# Apply a fix for pprof for unaligned pointer access
-pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }

 [workspace.dependencies.meter-macros]
 git = "https://github.com/GreptimeTeam/greptime-meter.git"
-rev = "a10facb353b41460eeb98578868ebf19c2084fac"
+rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"

 [profile.release]
 debug = 1
--- a/Cross.toml
+++ b/Cross.toml
@@ -1,3 +1,6 @@
+[target.aarch64-unknown-linux-gnu]
+image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:0.2.5"
+
 [build]
 pre-build = [
    "dpkg --add-architecture $CROSS_DEB_ARCH",
@@ -5,3 +8,8 @@ pre-build = [
    "curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
    "chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
 ]
+
+[build.env]
+passthrough = [
+    "JEMALLOC_SYS_WITH_LG_PAGE",
+]
--- a/README.md
+++ b/README.md
@@ -116,7 +116,7 @@ docker run -p 127.0.0.1:4000-4003:4000-4003 \
  --name greptime --rm \
  greptime/greptimedb:latest standalone start \
  --http-addr 0.0.0.0:4000 \
-  --rpc-addr 0.0.0.0:4001 \
+  --rpc-bind-addr 0.0.0.0:4001 \
  --mysql-addr 0.0.0.0:4002 \
  --postgres-addr 0.0.0.0:4003
 ```
--- a/config/config.md
+++ b/config/config.md
@@ -29,7 +29,7 @@
 | `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
 | `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
 | `grpc` | -- | -- | The gRPC server options. |
-| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
+| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
 | `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -40,6 +40,7 @@
 | `mysql.enable` | Bool | `true` | Whether to enable. |
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
 | `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `mysql.tls` | -- | -- | -- |
 | `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
 | `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -49,6 +50,7 @@
 | `postgres.enable` | Bool | `true` | Whether to enable |
 | `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
 | `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
 | `postgres.tls.mode` | String | `disable` | TLS mode. |
 | `postgres.tls.cert_path` | String | Unset | Certificate file path. |
@@ -58,6 +60,8 @@
 | `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
 | `influxdb` | -- | -- | InfluxDB protocol options. |
 | `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `jaeger` | -- | -- | Jaeger protocol options. |
+| `jaeger.enable` | Bool | `true` | Whether to enable Jaeger protocol in HTTP API. |
 | `prom_store` | -- | -- | Prometheus remote storage options |
 | `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
 | `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
@@ -65,8 +69,8 @@
 | `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
 | `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
-| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
+| `wal.purge_interval` | String | `1m` | The interval to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -88,8 +92,9 @@
 | `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
 | `metadata_store` | -- | -- | Metadata storage options. |
-| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
-| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
+| `metadata_store.file_size` | String | `64MB` | The size of the metadata store log file. |
+| `metadata_store.purge_threshold` | String | `256MB` | The threshold of the metadata store size to trigger a purge. |
+| `metadata_store.purge_interval` | String | `1m` | The interval of the metadata store to trigger a purge. |
 | `procedure` | -- | -- | Procedure storage options. |
 | `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
 | `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
@@ -147,6 +152,7 @@
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
 | `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
 | `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
+| `region_engine.mito.index.staging_ttl` | String | `7d` | The TTL of the staging directory.<br/>Defaults to 7 days.<br/>Setting it to "0s" to disable TTL. |
 | `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
 | `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
 | `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
@@ -221,8 +227,8 @@
 | `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
 | `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
 | `grpc` | -- | -- | The gRPC server options. |
-| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
-| `grpc.hostname` | String | `127.0.0.1:4001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
+| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
+| `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
 | `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
 | `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -233,6 +239,7 @@
 | `mysql.enable` | Bool | `true` | Whether to enable. |
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
 | `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `mysql.tls` | -- | -- | -- |
 | `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
 | `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -242,6 +249,7 @@
 | `postgres.enable` | Bool | `true` | Whether to enable |
 | `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
 | `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
 | `postgres.tls.mode` | String | `disable` | TLS mode. |
 | `postgres.tls.cert_path` | String | Unset | Certificate file path. |
@@ -251,6 +259,8 @@
 | `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
 | `influxdb` | -- | -- | InfluxDB protocol options. |
 | `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `jaeger` | -- | -- | Jaeger protocol options. |
+| `jaeger.enable` | Bool | `true` | Whether to enable Jaeger protocol in HTTP API. |
 | `prom_store` | -- | -- | Prometheus remote storage options |
 | `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
 | `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
@@ -300,7 +310,7 @@
 | --- | -----| ------- | ----------- |
 | `data_home` | String | `/tmp/metasrv/` | The working home directory. |
 | `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
-| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost. |
+| `server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
 | `store_addrs` | Array | -- | Store server address default to etcd store.<br/>For postgres store, the format is:<br/>"password=password dbname=postgres user=postgres host=localhost port=5432"<br/>For etcd store, the format is:<br/>"127.0.0.1:2379" |
 | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
 | `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store` |
@@ -309,6 +319,7 @@
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
+| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
@@ -376,19 +387,14 @@
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
 | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
-| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
-| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
-| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
-| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
-| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
 | `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
 | `http` | -- | -- | The HTTP server options. |
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
 | `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
 | `grpc` | -- | -- | The gRPC server options. |
-| `grpc.addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
-| `grpc.hostname` | String | `127.0.0.1:3001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
+| `grpc.bind_addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
+| `grpc.server_addr` | String | `127.0.0.1:3001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
 | `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
 | `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
@@ -487,6 +493,7 @@
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
 | `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
 | `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
+| `region_engine.mito.index.staging_ttl` | String | `7d` | The TTL of the staging directory.<br/>Defaults to 7 days.<br/>Setting it to "0s" to disable TTL. |
 | `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
 | `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
 | `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
@@ -549,8 +556,8 @@
 | `flow` | -- | -- | flow engine options. |
 | `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
 | `grpc` | -- | -- | The gRPC server options. |
-| `grpc.addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
-| `grpc.hostname` | String | `127.0.0.1` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
+| `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
+| `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
 | `grpc.runtime_size` | Integer | `2` | The number of server worker threads. |
 | `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
 | `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -19,26 +19,6 @@ init_regions_parallelism = 16
 ## The maximum current queries allowed to be executed. Zero means unlimited.
 max_concurrent_queries = 0

-## Deprecated, use `grpc.addr` instead.
-## @toml2docs:none-default
-rpc_addr = "127.0.0.1:3001"
-
-## Deprecated, use `grpc.hostname` instead.
-## @toml2docs:none-default
-rpc_hostname = "127.0.0.1"
-
-## Deprecated, use `grpc.runtime_size` instead.
-## @toml2docs:none-default
-rpc_runtime_size = 8
-
-## Deprecated, use `grpc.rpc_max_recv_message_size` instead.
-## @toml2docs:none-default
-rpc_max_recv_message_size = "512MB"
-
-## Deprecated, use `grpc.rpc_max_send_message_size` instead.
-## @toml2docs:none-default
-rpc_max_send_message_size = "512MB"
-
 ## Enable telemetry to collect anonymous usage data. Enabled by default.
 #+ enable_telemetry = true

@@ -56,10 +36,11 @@ body_limit = "64MB"
 ## The gRPC server options.
 [grpc]
 ## The address to bind the gRPC server.
-addr = "127.0.0.1:3001"
-## The hostname advertised to the metasrv,
-## and used for connections from outside the host
-hostname = "127.0.0.1:3001"
+bind_addr = "127.0.0.1:3001"
+## The address advertised to the metasrv, and used for connections from outside the host.
+## If left empty or unset, the server will automatically use the IP address of the first network interface
+## on the host, with the same port number as the one specified in `grpc.bind_addr`.
+server_addr = "127.0.0.1:3001"
 ## The number of server worker threads.
 runtime_size = 8
 ## The maximum receive message size for gRPC server.
@@ -516,6 +497,11 @@ aux_path = ""
 ## The max capacity of the staging directory.
 staging_size = "2GB"

+## The TTL of the staging directory.
+## Defaults to 7 days.
+## Setting it to "0s" to disable TTL.
+staging_ttl = "7d"
+
 ## Cache size for inverted index metadata.
 metadata_cache_size = "64MiB"

--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -14,10 +14,10 @@ node_id = 14
 ## The gRPC server options.
 [grpc]
 ## The address to bind the gRPC server.
-addr = "127.0.0.1:6800"
-## The hostname advertised to the metasrv,
+bind_addr = "127.0.0.1:6800"
+## The address advertised to the metasrv,
 ## and used for connections from outside the host
-hostname = "127.0.0.1"
+server_addr = "127.0.0.1:6800"
 ## The number of server worker threads.
 runtime_size = 2
 ## The maximum receive message size for gRPC server.
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -41,10 +41,11 @@ cors_allowed_origins = ["https://example.com"]
 ## The gRPC server options.
 [grpc]
 ## The address to bind the gRPC server.
-addr = "127.0.0.1:4001"
-## The hostname advertised to the metasrv,
-## and used for connections from outside the host
-hostname = "127.0.0.1:4001"
+bind_addr = "127.0.0.1:4001"
+## The address advertised to the metasrv, and used for connections from outside the host.
+## If left empty or unset, the server will automatically use the IP address of the first network interface
+## on the host, with the same port number as the one specified in `grpc.bind_addr`.
+server_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8

@@ -73,6 +74,9 @@ enable = true
 addr = "127.0.0.1:4002"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 # MySQL server TLS options.
 [mysql.tls]
@@ -104,6 +108,9 @@ enable = true
 addr = "127.0.0.1:4003"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 ## PostgresSQL server TLS options, see `mysql.tls` section.
 [postgres.tls]
@@ -131,6 +138,11 @@ enable = true
 ## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

+## Jaeger protocol options.
+[jaeger]
+## Whether to enable Jaeger protocol in HTTP API.
+enable = true
+
 ## Prometheus remote storage options
 [prom_store]
 ## Whether to enable Prometheus remote write and read in HTTP API.
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -4,7 +4,9 @@ data_home = "/tmp/metasrv/"
 ## The bind address of metasrv.
 bind_addr = "127.0.0.1:3002"

-## The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost.
+## The communication server address for the frontend and datanode to connect to metasrv.
+## If left empty or unset, the server will automatically use the IP address of the first network interface
+## on the host, with the same port number as the one specified in `bind_addr`.
 server_addr = "127.0.0.1:3002"

 ## Store server address default to etcd store.
@@ -48,6 +50,9 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

+## Max allowed idle time before removing node info from metasrv memory.
+node_max_idle_time = "24hours"
+
 ## Whether to enable greptimedb telemetry. Enabled by default.
 #+ enable_telemetry = true

--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -49,7 +49,7 @@ cors_allowed_origins = ["https://example.com"]
 ## The gRPC server options.
 [grpc]
 ## The address to bind the gRPC server.
-addr = "127.0.0.1:4001"
+bind_addr = "127.0.0.1:4001"
 ## The number of server worker threads.
 runtime_size = 8

@@ -78,6 +78,9 @@ enable = true
 addr = "127.0.0.1:4002"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 # MySQL server TLS options.
 [mysql.tls]
@@ -109,6 +112,9 @@ enable = true
 addr = "127.0.0.1:4003"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 ## PostgresSQL server TLS options, see `mysql.tls` section.
 [postgres.tls]
@@ -136,6 +142,11 @@ enable = true
 ## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

+## Jaeger protocol options.
+[jaeger]
+## Whether to enable Jaeger protocol in HTTP API.
+enable = true
+
 ## Prometheus remote storage options
 [prom_store]
 ## Whether to enable Prometheus remote write and read in HTTP API.
@@ -159,11 +170,11 @@ dir = "/tmp/greptimedb/wal"
 ## **It's only used when the provider is `raft_engine`**.
 file_size = "128MB"

-## The threshold of the WAL size to trigger a flush.
+## The threshold of the WAL size to trigger a purge.
 ## **It's only used when the provider is `raft_engine`**.
 purge_threshold = "1GB"

-## The interval to trigger a flush.
+## The interval to trigger a purge.
 ## **It's only used when the provider is `raft_engine`**.
 purge_interval = "1m"

@@ -278,10 +289,12 @@ overwrite_entry_start_id = false

 ## Metadata storage options.
 [metadata_store]
-## Kv file size in bytes.
-file_size = "256MB"
-## Kv purge threshold.
-purge_threshold = "4GB"
+## The size of the metadata store log file.
+file_size = "64MB"
+## The threshold of the metadata store size to trigger a purge.
+purge_threshold = "256MB"
+## The interval of the metadata store to trigger a purge.
+purge_interval = "1m"

 ## Procedure storage options.
 [procedure]
@@ -571,6 +584,11 @@ aux_path = ""
 ## The max capacity of the staging directory.
 staging_size = "2GB"

+## The TTL of the staging directory.
+## Defaults to 7 days.
+## Setting it to "0s" to disable TTL.
+staging_ttl = "7d"
+
 ## Cache size for inverted index metadata.
 metadata_cache_size = "64MiB"

--- a/docker/docker-compose/cluster-with-etcd.yaml
+++ b/docker/docker-compose/cluster-with-etcd.yaml
@@ -43,8 +43,8 @@ services:
    command:
      - metasrv
      - start
-      - --bind-addr=0.0.0.0:3002
-      - --server-addr=metasrv:3002
+      - --rpc-bind-addr=0.0.0.0:3002
+      - --rpc-server-addr=metasrv:3002
      - --store-addrs=etcd0:2379
      - --http-addr=0.0.0.0:3000
    healthcheck:
@@ -68,8 +68,8 @@ services:
      - datanode
      - start
      - --node-id=0
-      - --rpc-addr=0.0.0.0:3001
-      - --rpc-hostname=datanode0:3001
+      - --rpc-bind-addr=0.0.0.0:3001
+      - --rpc-server-addr=datanode0:3001
      - --metasrv-addrs=metasrv:3002
      - --http-addr=0.0.0.0:5000
    volumes:
@@ -98,7 +98,7 @@ services:
      - start
      - --metasrv-addrs=metasrv:3002
      - --http-addr=0.0.0.0:4000
-      - --rpc-addr=0.0.0.0:4001
+      - --rpc-bind-addr=0.0.0.0:4001
      - --mysql-addr=0.0.0.0:4002
      - --postgres-addr=0.0.0.0:4003
    healthcheck:
@@ -123,8 +123,8 @@ services:
      - start
      - --node-id=0
      - --metasrv-addrs=metasrv:3002
-      - --rpc-addr=0.0.0.0:4004
-      - --rpc-hostname=flownode0:4004
+      - --rpc-bind-addr=0.0.0.0:4004
+      - --rpc-server-addr=flownode0:4004
      - --http-addr=0.0.0.0:4005
    depends_on:
      frontend0:
--- a/docs/how-to/how-to-profile-memory.md
+++ b/docs/how-to/how-to-profile-memory.md
@@ -4,6 +4,16 @@ This crate provides an easy approach to dump memory profiling info.

 ## Prerequisites
 ### jemalloc
+jeprof is already compiled in the target directory of GreptimeDB. You can find the binary and use it.
+```
+# find jeprof binary
+find . -name 'jeprof'
+# add executable permission
+chmod +x <path_to_jeprof>
+```
+The path is usually under `./target/${PROFILE}/build/tikv-jemalloc-sys-${HASH}/out/build/bin/jeprof`.
+The default version of jemalloc installed from the package manager may not have the `--collapsed` option.
+You may need to check the whether the `jeprof` version is >= `5.3.0` if you want to install it from the package manager.
 ```bash
 # for macOS
 brew install jemalloc
@@ -23,7 +33,11 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph
 Start GreptimeDB instance with environment variables:

 ```bash
+# for Linux
 MALLOC_CONF=prof:true ./target/debug/greptime standalone start
+
+# for macOS
+_RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start
 ```

 Dump memory profiling data through HTTP API:
--- a/docs/logo-text-padding-dark.png
+++ b/docs/logo-text-padding-dark.png
--- a/docs/logo-text-padding.png
+++ b/docs/logo-text-padding.png
--- a/grafana/greptimedb-cluster.json
+++ b/grafana/greptimedb-cluster.json
--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
@@ -384,8 +384,8 @@
        "rowHeight": 0.9,
        "showValue": "auto",
        "tooltip": {
-          "mode": "none",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -483,8 +483,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "pluginVersion": "10.2.3",
@@ -578,8 +578,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "pluginVersion": "10.2.3",
@@ -601,7 +601,7 @@
      "type": "timeseries"
    },
    {
-      "collapsed": true,
+      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
@@ -684,8 +684,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -878,8 +878,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1124,8 +1124,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1223,8 +1223,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1322,8 +1322,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1456,8 +1456,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1573,8 +1573,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1673,8 +1673,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1773,8 +1773,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1890,8 +1890,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2002,8 +2002,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2120,8 +2120,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2233,8 +2233,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2334,8 +2334,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2435,8 +2435,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2548,8 +2548,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2661,8 +2661,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2788,8 +2788,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2889,8 +2889,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2990,8 +2990,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3091,8 +3091,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3191,8 +3191,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3302,8 +3302,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3432,8 +3432,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3543,8 +3543,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3657,8 +3657,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3808,8 +3808,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3909,8 +3909,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -4011,8 +4011,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -4113,8 +4113,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -15,13 +15,10 @@ common-macro.workspace = true
 common-time.workspace = true
 datatypes.workspace = true
 greptime-proto.workspace = true
-paste = "1.0"
+paste.workspace = true
 prost.workspace = true
 serde_json.workspace = true
 snafu.workspace = true

 [build-dependencies]
 tonic-build = "0.11"
-
-[dev-dependencies]
-paste = "1.0"
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -15,10 +15,10 @@
 use std::collections::HashMap;

 use datatypes::schema::{
-    ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
-    FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
+    ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexType,
+    COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
 };
-use greptime_proto::v1::Analyzer;
+use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType};
 use snafu::ResultExt;

 use crate::error::{self, Result};
@@ -121,6 +121,13 @@ pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
    }
 }

+/// Tries to construct a `SkippingIndexType` from the given skipping index type.
+pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
+    match skipping_index_type {
+        PbSkippingIndexType::BloomFilter => SkippingIndexType::BloomFilter,
+    }
+}
+
 #[cfg(test)]
 mod tests {

--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -15,7 +15,7 @@ api.workspace = true
 arrow.workspace = true
 arrow-schema.workspace = true
 async-stream.workspace = true
-async-trait = "0.1"
+async-trait.workspace = true
 bytes.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
@@ -31,7 +31,7 @@ common-version.workspace = true
 dashmap.workspace = true
 datafusion.workspace = true
 datatypes.workspace = true
-futures = "0.3"
+futures.workspace = true
 futures-util.workspace = true
 humantime.workspace = true
 itertools.workspace = true
@@ -39,7 +39,7 @@ lazy_static.workspace = true
 meta-client.workspace = true
 moka = { workspace = true, features = ["future", "sync"] }
 partition.workspace = true
-paste = "1.0"
+paste.workspace = true
 prometheus.workspace = true
 rustc-hash.workspace = true
 serde_json.workspace = true
@@ -49,7 +49,7 @@ sql.workspace = true
 store-api.workspace = true
 table.workspace = true
 tokio.workspace = true
-tokio-stream = "0.1"
+tokio-stream.workspace = true

 [dev-dependencies]
 cache.workspace = true
--- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs
+++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs
@@ -228,12 +228,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
                let keys = &table_info.meta.primary_key_indices;
                let schema = table.schema();

-                // For compatibility, use primary key columns as inverted index columns.
-                let pk_as_inverted_index = !schema
-                    .column_schemas()
-                    .iter()
-                    .any(|c| c.has_inverted_index_key());
-
                for (idx, column) in schema.column_schemas().iter().enumerate() {
                    let mut constraints = vec![];
                    if column.is_time_index() {
@@ -251,10 +245,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
                    // TODO(dimbtp): foreign key constraint not supported yet
                    if keys.contains(&idx) {
                        constraints.push(PRI_CONSTRAINT_NAME);
-
-                        if pk_as_inverted_index {
-                            constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
-                        }
                    }
                    if column.is_inverted_indexed() {
                        constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
--- a/src/cli/src/bench.rs
+++ b/src/cli/src/bench.rs
@@ -24,10 +24,11 @@ use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::etcd::EtcdStore;
 use common_meta::kv_backend::memory::MemoryKvBackend;
 #[cfg(feature = "pg_kvbackend")]
-use common_meta::kv_backend::postgres::PgStore;
+use common_meta::kv_backend::rds::PgStore;
 use common_meta::peer::Peer;
 use common_meta::rpc::router::{Region, RegionRoute};
 use common_telemetry::info;
+use common_wal::options::WalOptions;
 use datatypes::data_type::ConcreteDataType;
 use datatypes::schema::{ColumnSchema, RawSchema};
 use rand::Rng;
@@ -184,7 +185,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
    region_routes
 }

-fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, String> {
+fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, WalOptions> {
    // TODO(niebayes): construct region wal options for benchmark.
    let _ = regions;
    HashMap::default()
--- a/src/cli/src/bench/metadata.rs
+++ b/src/cli/src/bench/metadata.rs
@@ -49,7 +49,12 @@ impl TableMetadataBencher {

                let regions: Vec<_> = (0..64).collect();
                let region_routes = create_region_routes(regions.clone());
-                let region_wal_options = create_region_wal_options(regions);
+                let region_wal_options = create_region_wal_options(regions)
+                    .into_iter()
+                    .map(|(region_id, wal_options)| {
+                        (region_id, serde_json::to_string(&wal_options).unwrap())
+                    })
+                    .collect();

                let start = Instant::now();

@@ -109,9 +114,17 @@ impl TableMetadataBencher {
                let table_info = table_info.unwrap();
                let table_route = table_route.unwrap();
                let table_id = table_info.table_info.ident.table_id;
+
+                let regions: Vec<_> = (0..64).collect();
+                let region_wal_options = create_region_wal_options(regions);
                let _ = self
                    .table_metadata_manager
-                    .delete_table_metadata(table_id, &table_info.table_name(), &table_route)
+                    .delete_table_metadata(
+                        table_id,
+                        &table_info.table_name(),
+                        &table_route,
+                        &region_wal_options,
+                    )
                    .await;
                start.elapsed()
            },
--- a/src/client/src/lib.rs
+++ b/src/client/src/lib.rs
@@ -16,7 +16,6 @@

 mod client;
 pub mod client_manager;
-#[cfg(feature = "testing")]
 mod database;
 pub mod error;
 pub mod flow;
@@ -34,7 +33,6 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
 use snafu::OptionExt;

 pub use self::client::Client;
-#[cfg(feature = "testing")]
 pub use self::database::Database;
 pub use self::error::{Error, Result};
 use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -126,10 +126,14 @@ impl SubCommand {
 struct StartCommand {
    #[clap(long)]
    node_id: Option<u64>,
-    #[clap(long)]
-    rpc_addr: Option<String>,
-    #[clap(long)]
-    rpc_hostname: Option<String>,
+    /// The address to bind the gRPC server.
+    #[clap(long, alias = "rpc-addr")]
+    rpc_bind_addr: Option<String>,
+    /// The address advertised to the metasrv, and used for connections from outside the host.
+    /// If left empty or unset, the server will automatically use the IP address of the first network interface
+    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
+    #[clap(long, alias = "rpc-hostname")]
+    rpc_server_addr: Option<String>,
    #[clap(long, value_delimiter = ',', num_args = 1..)]
    metasrv_addrs: Option<Vec<String>>,
    #[clap(short, long)]
@@ -181,18 +185,18 @@ impl StartCommand {
            tokio_console_addr: global_options.tokio_console_addr.clone(),
        };

-        if let Some(addr) = &self.rpc_addr {
-            opts.grpc.addr.clone_from(addr);
+        if let Some(addr) = &self.rpc_bind_addr {
+            opts.grpc.bind_addr.clone_from(addr);
        } else if let Some(addr) = &opts.rpc_addr {
            warn!("Use the deprecated attribute `DatanodeOptions.rpc_addr`, please use `grpc.addr` instead.");
-            opts.grpc.addr.clone_from(addr);
+            opts.grpc.bind_addr.clone_from(addr);
        }

-        if let Some(hostname) = &self.rpc_hostname {
-            opts.grpc.hostname.clone_from(hostname);
-        } else if let Some(hostname) = &opts.rpc_hostname {
+        if let Some(server_addr) = &self.rpc_server_addr {
+            opts.grpc.server_addr.clone_from(server_addr);
+        } else if let Some(server_addr) = &opts.rpc_hostname {
            warn!("Use the deprecated attribute `DatanodeOptions.rpc_hostname`, please use `grpc.hostname` instead.");
-            opts.grpc.hostname.clone_from(hostname);
+            opts.grpc.server_addr.clone_from(server_addr);
        }

        if let Some(runtime_size) = opts.rpc_runtime_size {
@@ -277,7 +281,7 @@ impl StartCommand {

        let plugin_opts = opts.plugins;
        let mut opts = opts.component;
-        opts.grpc.detect_hostname();
+        opts.grpc.detect_server_addr();
        let mut plugins = Plugins::new();
        plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
            .await
@@ -357,8 +361,8 @@ mod tests {
            rpc_addr = "127.0.0.1:4001"
            rpc_hostname = "192.168.0.1"
            [grpc]
-            addr = "127.0.0.1:3001"
-            hostname = "127.0.0.1"
+            bind_addr = "127.0.0.1:3001"
+            server_addr = "127.0.0.1"
            runtime_size = 8
        "#;
        write!(file, "{}", toml_str).unwrap();
@@ -369,8 +373,8 @@ mod tests {
        };

        let options = cmd.load_options(&Default::default()).unwrap().component;
-        assert_eq!("127.0.0.1:4001".to_string(), options.grpc.addr);
-        assert_eq!("192.168.0.1".to_string(), options.grpc.hostname);
+        assert_eq!("127.0.0.1:4001".to_string(), options.grpc.bind_addr);
+        assert_eq!("192.168.0.1".to_string(), options.grpc.server_addr);
    }

    #[test]
@@ -431,7 +435,7 @@ mod tests {

        let options = cmd.load_options(&Default::default()).unwrap().component;

-        assert_eq!("127.0.0.1:3001".to_string(), options.grpc.addr);
+        assert_eq!("127.0.0.1:3001".to_string(), options.grpc.bind_addr);
        assert_eq!(Some(42), options.node_id);

        let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
@@ -645,7 +649,7 @@ mod tests {
                    opts.http.addr,
                    DatanodeOptions::default().component.http.addr
                );
-                assert_eq!(opts.grpc.hostname, "10.103.174.219");
+                assert_eq!(opts.grpc.server_addr, "10.103.174.219");
            },
        );
    }
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
 use common_telemetry::logging::TracingOptions;
 use common_version::{short_version, version};
-use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
+use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
 use meta_client::{MetaClientOptions, MetaClientType};
 use servers::Mode;
 use snafu::{OptionExt, ResultExt};
@@ -129,11 +129,13 @@ struct StartCommand {
    #[clap(long)]
    node_id: Option<u64>,
    /// Bind address for the gRPC server.
-    #[clap(long)]
-    rpc_addr: Option<String>,
-    /// Hostname for the gRPC server.
-    #[clap(long)]
-    rpc_hostname: Option<String>,
+    #[clap(long, alias = "rpc-addr")]
+    rpc_bind_addr: Option<String>,
+    /// The address advertised to the metasrv, and used for connections from outside the host.
+    /// If left empty or unset, the server will automatically use the IP address of the first network interface
+    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
+    #[clap(long, alias = "rpc-hostname")]
+    rpc_server_addr: Option<String>,
    /// Metasrv address list;
    #[clap(long, value_delimiter = ',', num_args = 1..)]
    metasrv_addrs: Option<Vec<String>>,
@@ -184,12 +186,12 @@ impl StartCommand {
            tokio_console_addr: global_options.tokio_console_addr.clone(),
        };

-        if let Some(addr) = &self.rpc_addr {
-            opts.grpc.addr.clone_from(addr);
+        if let Some(addr) = &self.rpc_bind_addr {
+            opts.grpc.bind_addr.clone_from(addr);
        }

-        if let Some(hostname) = &self.rpc_hostname {
-            opts.grpc.hostname.clone_from(hostname);
+        if let Some(server_addr) = &self.rpc_server_addr {
+            opts.grpc.server_addr.clone_from(server_addr);
        }

        if let Some(node_id) = self.node_id {
@@ -237,7 +239,7 @@ impl StartCommand {
        info!("Flownode options: {:#?}", opts);

        let mut opts = opts.component;
-        opts.grpc.detect_hostname();
+        opts.grpc.detect_server_addr();

        // TODO(discord9): make it not optionale after cluster id is required
        let cluster_id = opts.cluster_id.unwrap_or(0);
@@ -315,6 +317,8 @@ impl StartCommand {
            Arc::new(executor),
        );

+        let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
+
        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
        let flownode_builder = FlownodeBuilder::new(
            opts,
@@ -322,6 +326,7 @@ impl StartCommand {
            table_metadata_manager,
            catalog_manager.clone(),
            flow_metadata_manager,
+            Arc::new(frontend_client),
        )
        .with_heartbeat_task(heartbeat_task);

--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -136,13 +136,19 @@ impl SubCommand {

 #[derive(Debug, Default, Parser)]
 pub struct StartCommand {
+    /// The address to bind the gRPC server.
+    #[clap(long, alias = "rpc-addr")]
+    rpc_bind_addr: Option<String>,
+    /// The address advertised to the metasrv, and used for connections from outside the host.
+    /// If left empty or unset, the server will automatically use the IP address of the first network interface
+    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
+    #[clap(long, alias = "rpc-hostname")]
+    rpc_server_addr: Option<String>,
    #[clap(long)]
    http_addr: Option<String>,
    #[clap(long)]
    http_timeout: Option<u64>,
    #[clap(long)]
-    rpc_addr: Option<String>,
-    #[clap(long)]
    mysql_addr: Option<String>,
    #[clap(long)]
    postgres_addr: Option<String>,
@@ -218,11 +224,15 @@ impl StartCommand {
            opts.http.disable_dashboard = disable_dashboard;
        }

-        if let Some(addr) = &self.rpc_addr {
-            opts.grpc.addr.clone_from(addr);
+        if let Some(addr) = &self.rpc_bind_addr {
+            opts.grpc.bind_addr.clone_from(addr);
            opts.grpc.tls = tls_opts.clone();
        }

+        if let Some(addr) = &self.rpc_server_addr {
+            opts.grpc.server_addr.clone_from(addr);
+        }
+
        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
            opts.mysql.addr.clone_from(addr);
@@ -269,7 +279,7 @@ impl StartCommand {

        let plugin_opts = opts.plugins;
        let mut opts = opts.component;
-        opts.grpc.detect_hostname();
+        opts.grpc.detect_server_addr();
        let mut plugins = Plugins::new();
        plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
            .await
@@ -413,7 +423,7 @@ mod tests {

        let default_opts = FrontendOptions::default().component;

-        assert_eq!(opts.grpc.addr, default_opts.grpc.addr);
+        assert_eq!(opts.grpc.bind_addr, default_opts.grpc.bind_addr);
        assert!(opts.mysql.enable);
        assert_eq!(opts.mysql.runtime_size, default_opts.mysql.runtime_size);
        assert!(opts.postgres.enable);
@@ -604,7 +614,7 @@ mod tests {
                assert_eq!(fe_opts.http.addr, "127.0.0.1:14000");

                // Should be default value.
-                assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
+                assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
            },
        );
    }
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -42,7 +42,7 @@ pub struct Instance {
 }

 impl Instance {
-    fn new(instance: MetasrvInstance, guard: Vec<WorkerGuard>) -> Self {
+    pub fn new(instance: MetasrvInstance, guard: Vec<WorkerGuard>) -> Self {
        Self {
            instance,
            _guard: guard,
@@ -133,11 +133,15 @@ impl SubCommand {

 #[derive(Debug, Default, Parser)]
 struct StartCommand {
-    #[clap(long)]
-    bind_addr: Option<String>,
-    #[clap(long)]
-    server_addr: Option<String>,
-    #[clap(long, aliases = ["store-addr"], value_delimiter = ',', num_args = 1..)]
+    /// The address to bind the gRPC server.
+    #[clap(long, alias = "bind-addr")]
+    rpc_bind_addr: Option<String>,
+    /// The communication server address for the frontend and datanode to connect to metasrv.
+    /// If left empty or unset, the server will automatically use the IP address of the first network interface
+    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
+    #[clap(long, alias = "server-addr")]
+    rpc_server_addr: Option<String>,
+    #[clap(long, alias = "store-addr", value_delimiter = ',', num_args = 1..)]
    store_addrs: Option<Vec<String>>,
    #[clap(short, long)]
    config_file: Option<String>,
@@ -201,11 +205,11 @@ impl StartCommand {
            tokio_console_addr: global_options.tokio_console_addr.clone(),
        };

-        if let Some(addr) = &self.bind_addr {
+        if let Some(addr) = &self.rpc_bind_addr {
            opts.bind_addr.clone_from(addr);
        }

-        if let Some(addr) = &self.server_addr {
+        if let Some(addr) = &self.rpc_server_addr {
            opts.server_addr.clone_from(addr);
        }

@@ -269,11 +273,13 @@ impl StartCommand {
        log_versions(version(), short_version(), APP_NAME);

        info!("Metasrv start command: {:#?}", self);
-        info!("Metasrv options: {:#?}", opts);

        let plugin_opts = opts.plugins;
        let mut opts = opts.component;
        opts.detect_server_addr();
+
+        info!("Metasrv options: {:#?}", opts);
+
        let mut plugins = Plugins::new();
        plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
            .await
@@ -306,8 +312,8 @@ mod tests {
    #[test]
    fn test_read_from_cmd() {
        let cmd = StartCommand {
-            bind_addr: Some("127.0.0.1:3002".to_string()),
-            server_addr: Some("127.0.0.1:3002".to_string()),
+            rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
+            rpc_server_addr: Some("127.0.0.1:3002".to_string()),
            store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
            selector: Some("LoadBased".to_string()),
            ..Default::default()
@@ -381,8 +387,8 @@ mod tests {
    #[test]
    fn test_load_log_options_from_cli() {
        let cmd = StartCommand {
-            bind_addr: Some("127.0.0.1:3002".to_string()),
-            server_addr: Some("127.0.0.1:3002".to_string()),
+            rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
+            rpc_server_addr: Some("127.0.0.1:3002".to_string()),
            store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
            selector: Some("LoadBased".to_string()),
            ..Default::default()
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -54,13 +54,17 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
 use datanode::datanode::{Datanode, DatanodeBuilder};
 use datanode::region_server::RegionServer;
 use file_engine::config::EngineConfig as FileEngineConfig;
-use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
+use flow::{
+    FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
+    FrontendInvoker,
+};
 use frontend::frontend::FrontendOptions;
 use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
 use frontend::server::Services;
 use frontend::service_config::{
-    InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
+    InfluxdbOptions, JaegerOptions, MysqlOptions, OpentsdbOptions, PostgresOptions,
+    PromStoreOptions,
 };
 use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
 use mito2::config::MitoConfig;
@@ -140,6 +144,7 @@ pub struct StandaloneOptions {
    pub postgres: PostgresOptions,
    pub opentsdb: OpentsdbOptions,
    pub influxdb: InfluxdbOptions,
+    pub jaeger: JaegerOptions,
    pub prom_store: PromStoreOptions,
    pub wal: DatanodeWalConfig,
    pub storage: StorageConfig,
@@ -169,6 +174,7 @@ impl Default for StandaloneOptions {
            postgres: PostgresOptions::default(),
            opentsdb: OpentsdbOptions::default(),
            influxdb: InfluxdbOptions::default(),
+            jaeger: JaegerOptions::default(),
            prom_store: PromStoreOptions::default(),
            wal: DatanodeWalConfig::default(),
            storage: StorageConfig::default(),
@@ -217,6 +223,7 @@ impl StandaloneOptions {
            postgres: cloned_opts.postgres,
            opentsdb: cloned_opts.opentsdb,
            influxdb: cloned_opts.influxdb,
+            jaeger: cloned_opts.jaeger,
            prom_store: cloned_opts.prom_store,
            meta_client: None,
            logging: cloned_opts.logging,
@@ -329,8 +336,8 @@ impl App for Instance {
 pub struct StartCommand {
    #[clap(long)]
    http_addr: Option<String>,
-    #[clap(long)]
-    rpc_addr: Option<String>,
+    #[clap(long, alias = "rpc-addr")]
+    rpc_bind_addr: Option<String>,
    #[clap(long)]
    mysql_addr: Option<String>,
    #[clap(long)]
@@ -407,9 +414,9 @@ impl StartCommand {
            opts.storage.data_home.clone_from(data_home);
        }

-        if let Some(addr) = &self.rpc_addr {
+        if let Some(addr) = &self.rpc_bind_addr {
            // frontend grpc addr conflict with datanode default grpc addr
-            let datanode_grpc_addr = DatanodeOptions::default().grpc.addr;
+            let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
            if addr.eq(&datanode_grpc_addr) {
                return IllegalConfigSnafu {
                    msg: format!(
@@ -417,7 +424,7 @@ impl StartCommand {
                    ),
                }.fail();
            }
-            opts.grpc.addr.clone_from(addr)
+            opts.grpc.bind_addr.clone_from(addr)
        }

        if let Some(addr) = &self.mysql_addr {
@@ -464,7 +471,7 @@ impl StartCommand {
        let mut plugins = Plugins::new();
        let plugin_opts = opts.plugins;
        let mut opts = opts.component;
-        opts.grpc.detect_hostname();
+        opts.grpc.detect_server_addr();
        let fe_opts = opts.frontend_options();
        let dn_opts = opts.datanode_options();

@@ -486,8 +493,8 @@ impl StartCommand {
        let metadata_dir = metadata_store_dir(data_home);
        let (kv_backend, procedure_manager) = FeInstance::try_build_standalone_components(
            metadata_dir,
-            opts.metadata_store.clone(),
-            opts.procedure.clone(),
+            opts.metadata_store,
+            opts.procedure,
        )
        .await
        .context(StartFrontendSnafu)?;
@@ -529,12 +536,16 @@ impl StartCommand {
            flow: opts.flow.clone(),
            ..Default::default()
        };
+
+        let fe_server_addr = fe_opts.grpc.bind_addr.clone();
+        let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
+            Arc::new(frontend_client),
        );
        let flownode = Arc::new(
            flow_builder
@@ -907,7 +918,7 @@ mod tests {
        assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
        assert_eq!(Duration::from_secs(33), fe_opts.http.timeout);
        assert_eq!(ReadableSize::mb(128), fe_opts.http.body_limit);
-        assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.addr);
+        assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.bind_addr);
        assert!(fe_opts.mysql.enable);
        assert_eq!("127.0.0.1:4002", fe_opts.mysql.addr);
        assert_eq!(2, fe_opts.mysql.runtime_size);
@@ -1037,7 +1048,7 @@ mod tests {
                assert_eq!(ReadableSize::mb(64), fe_opts.http.body_limit);

                // Should be default value.
-                assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
+                assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
            },
        );
    }
--- a/src/cmd/tests/cli.rs
+++ b/src/cmd/tests/cli.rs
@@ -63,7 +63,7 @@ mod tests {
            .args([
                "datanode",
                "start",
-                "--rpc-addr=0.0.0.0:4321",
+                "--rpc-bind-addr=0.0.0.0:4321",
                "--node-id=1",
                &format!("--data-home={}", data_home.path().display()),
                &format!("--wal-dir={}", wal_dir.path().display()),
@@ -80,7 +80,7 @@ mod tests {
            "--log-level=off",
            "cli",
            "attach",
-            "--grpc-addr=0.0.0.0:4321",
+            "--grpc-bind-addr=0.0.0.0:4321",
            // history commands can sneaky into stdout and mess up our tests, so disable it
            "--disable-helper",
        ]);
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -17,9 +17,6 @@ use std::time::Duration;
 use cmd::options::GreptimeOptions;
 use cmd::standalone::StandaloneOptions;
 use common_config::Configurable;
-use common_grpc::channel_manager::{
-    DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
-};
 use common_options::datanode::{ClientOptions, DatanodeClientOptions};
 use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
 use common_wal::config::raft_engine::RaftEngineConfig;
@@ -91,13 +88,8 @@ fn test_load_datanode_example_config() {
                ..Default::default()
            },
            grpc: GrpcOptions::default()
-                .with_addr("127.0.0.1:3001")
-                .with_hostname("127.0.0.1:3001"),
-            rpc_addr: Some("127.0.0.1:3001".to_string()),
-            rpc_hostname: Some("127.0.0.1".to_string()),
-            rpc_runtime_size: Some(8),
-            rpc_max_recv_message_size: Some(DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE),
-            rpc_max_send_message_size: Some(DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE),
+                .with_bind_addr("127.0.0.1:3001")
+                .with_server_addr("127.0.0.1:3001"),
            ..Default::default()
        },
        ..Default::default()
@@ -144,7 +136,9 @@ fn test_load_frontend_example_config() {
                remote_write: Some(Default::default()),
                ..Default::default()
            },
-            grpc: GrpcOptions::default().with_hostname("127.0.0.1:4001"),
+            grpc: GrpcOptions::default()
+                .with_bind_addr("127.0.0.1:4001")
+                .with_server_addr("127.0.0.1:4001"),
            http: HttpOptions {
                cors_allowed_origins: vec!["https://example.com".to_string()],
                ..Default::default()
--- a/src/common/base/Cargo.toml
+++ b/src/common/base/Cargo.toml
@@ -18,7 +18,7 @@ bytes.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 futures.workspace = true
-paste = "1.0"
+paste.workspace = true
 pin-project.workspace = true
 rand.workspace = true
 serde = { version = "1.0", features = ["derive"] }
--- a/src/common/config/Cargo.toml
+++ b/src/common/config/Cargo.toml
@@ -12,9 +12,11 @@ common-base.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 config.workspace = true
+humantime-serde.workspace = true
 num_cpus.workspace = true
 serde.workspace = true
 serde_json.workspace = true
+serde_with.workspace = true
 snafu.workspace = true
 sysinfo.workspace = true
 toml.workspace = true
--- a/src/common/config/src/lib.rs
+++ b/src/common/config/src/lib.rs
@@ -16,6 +16,8 @@ pub mod config;
 pub mod error;
 pub mod utils;

+use std::time::Duration;
+
 use common_base::readable_size::ReadableSize;
 pub use config::*;
 use serde::{Deserialize, Serialize};
@@ -34,22 +36,27 @@ pub enum Mode {
    Distributed,
 }

-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(default)]
 pub struct KvBackendConfig {
-    // Kv file size in bytes
+    /// The size of the metadata store backend log file.
    pub file_size: ReadableSize,
-    // Kv purge threshold in bytes
+    /// The threshold of the metadata store size to trigger a purge.
    pub purge_threshold: ReadableSize,
+    /// The interval of the metadata store to trigger a purge.
+    #[serde(with = "humantime_serde")]
+    pub purge_interval: Duration,
 }

 impl Default for KvBackendConfig {
    fn default() -> Self {
        Self {
-            // log file size 256MB
-            file_size: ReadableSize::mb(256),
-            // purge threshold 4GB
-            purge_threshold: ReadableSize::gb(4),
+            // The log file size 64MB
+            file_size: ReadableSize::mb(64),
+            // The log purge threshold 256MB
+            purge_threshold: ReadableSize::mb(256),
+            // The log purge interval 1m
+            purge_interval: Duration::from_secs(60),
        }
    }
 }
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -35,7 +35,7 @@ orc-rust = { version = "0.5", default-features = false, features = [
    "async",
 ] }
 parquet.workspace = true
-paste = "1.0"
+paste.workspace = true
 rand.workspace = true
 regex = "1.7"
 serde.workspace = true
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -12,9 +12,11 @@ default = ["geo"]
 geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]

 [dependencies]
+ahash = "0.8"
 api.workspace = true
 arc-swap = "1.0"
 async-trait.workspace = true
+bincode = "1.3"
 common-base.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
@@ -32,12 +34,13 @@ geo = { version = "0.29", optional = true }
 geo-types = { version = "0.7", optional = true }
 geohash = { version = "0.13", optional = true }
 h3o = { version = "0.6", optional = true }
+hyperloglogplus = "0.4"
 jsonb.workspace = true
 nalgebra.workspace = true
 num = "0.4"
 num-traits = "0.2"
 once_cell.workspace = true
-paste = "1.0"
+paste.workspace = true
 s2 = { version = "0.0.12", optional = true }
 serde.workspace = true
 serde_json.workspace = true
@@ -47,6 +50,7 @@ sql.workspace = true
 statrs = "0.16"
 store-api.workspace = true
 table.workspace = true
+uddsketch = { git = "https://github.com/GreptimeTeam/timescaledb-toolkit.git", rev = "84828fe8fb494a6a61412a3da96517fc80f7bb20" }
 wkt = { version = "0.11", optional = true }

 [dev-dependencies]
--- a/src/common/function/src/aggr.rs
+++ b/src/common/function/src/aggr.rs
@@ -0,0 +1,20 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod hll;
+mod uddsketch_state;
+
+pub(crate) use hll::HllStateType;
+pub use hll::{HllState, HLL_MERGE_NAME, HLL_NAME};
+pub use uddsketch_state::{UddSketchState, UDDSKETCH_STATE_NAME};
--- a/src/common/function/src/aggr/hll.rs
+++ b/src/common/function/src/aggr/hll.rs
@@ -0,0 +1,319 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_query::prelude::*;
+use common_telemetry::trace;
+use datafusion::arrow::array::ArrayRef;
+use datafusion::common::cast::{as_binary_array, as_string_array};
+use datafusion::common::not_impl_err;
+use datafusion::error::{DataFusionError, Result as DfResult};
+use datafusion::logical_expr::function::AccumulatorArgs;
+use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
+use datafusion::prelude::create_udaf;
+use datatypes::arrow::datatypes::DataType;
+use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
+
+use crate::utils::FixedRandomState;
+
+pub const HLL_NAME: &str = "hll";
+pub const HLL_MERGE_NAME: &str = "hll_merge";
+
+const DEFAULT_PRECISION: u8 = 14;
+
+pub(crate) type HllStateType = HyperLogLogPlus<String, FixedRandomState>;
+
+pub struct HllState {
+    hll: HllStateType,
+}
+
+impl std::fmt::Debug for HllState {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "HllState<Opaque>")
+    }
+}
+
+impl Default for HllState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl HllState {
+    pub fn new() -> Self {
+        Self {
+            // Safety: the DEFAULT_PRECISION is fixed and valid
+            hll: HllStateType::new(DEFAULT_PRECISION, FixedRandomState::new()).unwrap(),
+        }
+    }
+
+    /// Create a UDF for the `hll` function.
+    ///
+    /// `hll` accepts a string column and aggregates the
+    /// values into a HyperLogLog state.
+    pub fn state_udf_impl() -> AggregateUDF {
+        create_udaf(
+            HLL_NAME,
+            vec![DataType::Utf8],
+            Arc::new(DataType::Binary),
+            Volatility::Immutable,
+            Arc::new(Self::create_accumulator),
+            Arc::new(vec![DataType::Binary]),
+        )
+    }
+
+    /// Create a UDF for the `hll_merge` function.
+    ///
+    /// `hll_merge` accepts a binary column of states generated by `hll`
+    /// and merges them into a single state.
+    pub fn merge_udf_impl() -> AggregateUDF {
+        create_udaf(
+            HLL_MERGE_NAME,
+            vec![DataType::Binary],
+            Arc::new(DataType::Binary),
+            Volatility::Immutable,
+            Arc::new(Self::create_merge_accumulator),
+            Arc::new(vec![DataType::Binary]),
+        )
+    }
+
+    fn update(&mut self, value: &str) {
+        self.hll.insert(value);
+    }
+
+    fn merge(&mut self, raw: &[u8]) {
+        if let Ok(serialized) = bincode::deserialize::<HllStateType>(raw) {
+            if let Ok(()) = self.hll.merge(&serialized) {
+                return;
+            }
+        }
+        trace!("Warning: Failed to merge HyperLogLog from {:?}", raw);
+    }
+
+    fn create_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
+        match data_type {
+            DataType::Utf8 => Ok(Box::new(HllState::new())),
+            other => not_impl_err!("{HLL_NAME} does not support data type: {other}"),
+        }
+    }
+
+    fn create_merge_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
+        match data_type {
+            DataType::Binary => Ok(Box::new(HllState::new())),
+            other => not_impl_err!("{HLL_MERGE_NAME} does not support data type: {other}"),
+        }
+    }
+}
+
+impl DfAccumulator for HllState {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
+        let array = &values[0];
+
+        match array.data_type() {
+            DataType::Utf8 => {
+                let string_array = as_string_array(array)?;
+                for value in string_array.iter().flatten() {
+                    self.update(value);
+                }
+            }
+            DataType::Binary => {
+                let binary_array = as_binary_array(array)?;
+                for v in binary_array.iter().flatten() {
+                    self.merge(v);
+                }
+            }
+            _ => {
+                return not_impl_err!(
+                    "HLL functions do not support data type: {}",
+                    array.data_type()
+                )
+            }
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> DfResult<ScalarValue> {
+        Ok(ScalarValue::Binary(Some(
+            bincode::serialize(&self.hll).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
+            })?,
+        )))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(&self.hll)
+    }
+
+    fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
+        Ok(vec![ScalarValue::Binary(Some(
+            bincode::serialize(&self.hll).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
+            })?,
+        ))])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
+        let array = &states[0];
+        let binary_array = as_binary_array(array)?;
+        for v in binary_array.iter().flatten() {
+            self.merge(v);
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::arrow::array::{BinaryArray, StringArray};
+
+    use super::*;
+
+    #[test]
+    fn test_hll_basic() {
+        let mut state = HllState::new();
+        state.update("1");
+        state.update("2");
+        state.update("3");
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(hll.count().trunc() as u32, 3);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_hll_roundtrip() {
+        let mut state = HllState::new();
+        state.update("1");
+        state.update("2");
+
+        // Serialize
+        let serialized = state.evaluate().unwrap();
+
+        // Create new state and merge the serialized data
+        let mut new_state = HllState::new();
+        if let ScalarValue::Binary(Some(bytes)) = &serialized {
+            new_state.merge(bytes);
+
+            // Verify the merged state matches original
+            let result = new_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(new_bytes)) = result {
+                let mut original: HllStateType = bincode::deserialize(bytes).unwrap();
+                let mut merged: HllStateType = bincode::deserialize(&new_bytes).unwrap();
+                assert_eq!(original.count(), merged.count());
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_hll_batch_update() {
+        let mut state = HllState::new();
+
+        // Test string values
+        let str_values = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i"];
+        let str_array = Arc::new(StringArray::from(str_values)) as ArrayRef;
+        state.update_batch(&[str_array]).unwrap();
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(hll.count().trunc() as u32, 9);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_hll_merge_batch() {
+        let mut state1 = HllState::new();
+        state1.update("1");
+        let state1_binary = state1.evaluate().unwrap();
+
+        let mut state2 = HllState::new();
+        state2.update("2");
+        let state2_binary = state2.evaluate().unwrap();
+
+        let mut merged_state = HllState::new();
+        if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
+            (&state1_binary, &state2_binary)
+        {
+            let binary_array = Arc::new(BinaryArray::from(vec![
+                bytes1.as_slice(),
+                bytes2.as_slice(),
+            ])) as ArrayRef;
+            merged_state.merge_batch(&[binary_array]).unwrap();
+
+            let result = merged_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(bytes)) = result {
+                let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+                assert_eq!(hll.count().trunc() as u32, 2);
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar values");
+        }
+    }
+
+    #[test]
+    fn test_hll_merge_function() {
+        // Create two HLL states with different values
+        let mut state1 = HllState::new();
+        state1.update("1");
+        state1.update("2");
+        let state1_binary = state1.evaluate().unwrap();
+
+        let mut state2 = HllState::new();
+        state2.update("2");
+        state2.update("3");
+        let state2_binary = state2.evaluate().unwrap();
+
+        // Create a merge state and merge both states
+        let mut merge_state = HllState::new();
+        if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
+            (&state1_binary, &state2_binary)
+        {
+            let binary_array = Arc::new(BinaryArray::from(vec![
+                bytes1.as_slice(),
+                bytes2.as_slice(),
+            ])) as ArrayRef;
+            merge_state.update_batch(&[binary_array]).unwrap();
+
+            let result = merge_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(bytes)) = result {
+                let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+                // Should have 3 unique values: "1", "2", "3"
+                assert_eq!(hll.count().trunc() as u32, 3);
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar values");
+        }
+    }
+}
--- a/src/common/function/src/aggr/uddsketch_state.rs
+++ b/src/common/function/src/aggr/uddsketch_state.rs
@@ -0,0 +1,307 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_query::prelude::*;
+use common_telemetry::trace;
+use datafusion::common::cast::{as_binary_array, as_primitive_array};
+use datafusion::common::not_impl_err;
+use datafusion::error::{DataFusionError, Result as DfResult};
+use datafusion::logical_expr::function::AccumulatorArgs;
+use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
+use datafusion::physical_plan::expressions::Literal;
+use datafusion::prelude::create_udaf;
+use datatypes::arrow::array::ArrayRef;
+use datatypes::arrow::datatypes::{DataType, Float64Type};
+use uddsketch::{SketchHashKey, UDDSketch};
+
+pub const UDDSKETCH_STATE_NAME: &str = "uddsketch_state";
+
+#[derive(Debug)]
+pub struct UddSketchState {
+    uddsketch: UDDSketch,
+}
+
+impl UddSketchState {
+    pub fn new(bucket_size: u64, error_rate: f64) -> Self {
+        Self {
+            uddsketch: UDDSketch::new(bucket_size, error_rate),
+        }
+    }
+
+    pub fn udf_impl() -> AggregateUDF {
+        create_udaf(
+            UDDSKETCH_STATE_NAME,
+            vec![DataType::Int64, DataType::Float64, DataType::Float64],
+            Arc::new(DataType::Binary),
+            Volatility::Immutable,
+            Arc::new(|args| {
+                let (bucket_size, error_rate) = downcast_accumulator_args(args)?;
+                Ok(Box::new(UddSketchState::new(bucket_size, error_rate)))
+            }),
+            Arc::new(vec![DataType::Binary]),
+        )
+    }
+
+    fn update(&mut self, value: f64) {
+        self.uddsketch.add_value(value);
+    }
+
+    fn merge(&mut self, raw: &[u8]) {
+        if let Ok(uddsketch) = bincode::deserialize::<UDDSketch>(raw) {
+            if uddsketch.count() != 0 {
+                self.uddsketch.merge_sketch(&uddsketch);
+            }
+        } else {
+            trace!("Warning: Failed to deserialize UDDSketch from {:?}", raw);
+        }
+    }
+}
+
+fn downcast_accumulator_args(args: AccumulatorArgs) -> DfResult<(u64, f64)> {
+    let bucket_size = match args.exprs[0]
+        .as_any()
+        .downcast_ref::<Literal>()
+        .map(|lit| lit.value())
+    {
+        Some(ScalarValue::Int64(Some(value))) => *value as u64,
+        _ => {
+            return not_impl_err!(
+                "{} not supported for bucket size: {}",
+                UDDSKETCH_STATE_NAME,
+                &args.exprs[0]
+            )
+        }
+    };
+
+    let error_rate = match args.exprs[1]
+        .as_any()
+        .downcast_ref::<Literal>()
+        .map(|lit| lit.value())
+    {
+        Some(ScalarValue::Float64(Some(value))) => *value,
+        _ => {
+            return not_impl_err!(
+                "{} not supported for error rate: {}",
+                UDDSKETCH_STATE_NAME,
+                &args.exprs[1]
+            )
+        }
+    };
+
+    Ok((bucket_size, error_rate))
+}
+
+impl DfAccumulator for UddSketchState {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
+        let array = &values[2]; // the third column is data value
+        let f64_array = as_primitive_array::<Float64Type>(array)?;
+        for v in f64_array.iter().flatten() {
+            self.update(v);
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> DfResult<ScalarValue> {
+        Ok(ScalarValue::Binary(Some(
+            bincode::serialize(&self.uddsketch).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
+            })?,
+        )))
+    }
+
+    fn size(&self) -> usize {
+        // Base size of UDDSketch struct fields
+        let mut total_size = std::mem::size_of::<f64>() * 3 + // alpha, gamma, values_sum
+                            std::mem::size_of::<u32>() +      // compactions
+                            std::mem::size_of::<u64>() * 2; // max_buckets, num_values
+
+        // Size of buckets (SketchHashMap)
+        // Each bucket entry contains:
+        // - SketchHashKey (enum with i64/Zero/Invalid variants)
+        // - SketchHashEntry (count: u64, next: SketchHashKey)
+        let bucket_entry_size = std::mem::size_of::<SketchHashKey>() + // key
+                               std::mem::size_of::<u64>() +            // count
+                               std::mem::size_of::<SketchHashKey>(); // next
+
+        total_size += self.uddsketch.current_buckets_count() * bucket_entry_size;
+
+        total_size
+    }
+
+    fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
+        Ok(vec![ScalarValue::Binary(Some(
+            bincode::serialize(&self.uddsketch).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
+            })?,
+        ))])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
+        let array = &states[0];
+        let binary_array = as_binary_array(array)?;
+        for v in binary_array.iter().flatten() {
+            self.merge(v);
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::arrow::array::{BinaryArray, Float64Array};
+
+    use super::*;
+
+    #[test]
+    fn test_uddsketch_state_basic() {
+        let mut state = UddSketchState::new(10, 0.01);
+        state.update(1.0);
+        state.update(2.0);
+        state.update(3.0);
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(deserialized.count(), 3);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_roundtrip() {
+        let mut state = UddSketchState::new(10, 0.01);
+        state.update(1.0);
+        state.update(2.0);
+
+        // Serialize
+        let serialized = state.evaluate().unwrap();
+
+        // Create new state and merge the serialized data
+        let mut new_state = UddSketchState::new(10, 0.01);
+        if let ScalarValue::Binary(Some(bytes)) = &serialized {
+            new_state.merge(bytes);
+
+            // Verify the merged state matches original by comparing deserialized values
+            let original_sketch: UDDSketch = bincode::deserialize(bytes).unwrap();
+            let new_result = new_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(new_bytes)) = new_result {
+                let new_sketch: UDDSketch = bincode::deserialize(&new_bytes).unwrap();
+                assert_eq!(original_sketch.count(), new_sketch.count());
+                assert_eq!(original_sketch.sum(), new_sketch.sum());
+                assert_eq!(original_sketch.mean(), new_sketch.mean());
+                assert_eq!(original_sketch.max_error(), new_sketch.max_error());
+                // Compare a few quantiles to ensure statistical equivalence
+                for q in [0.1, 0.5, 0.9].iter() {
+                    assert!(
+                        (original_sketch.estimate_quantile(*q) - new_sketch.estimate_quantile(*q))
+                            .abs()
+                            < 1e-10,
+                        "Quantile {} mismatch: original={}, new={}",
+                        q,
+                        original_sketch.estimate_quantile(*q),
+                        new_sketch.estimate_quantile(*q)
+                    );
+                }
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_batch_update() {
+        let mut state = UddSketchState::new(10, 0.01);
+        let values = vec![1.0f64, 2.0, 3.0];
+        let array = Arc::new(Float64Array::from(values)) as ArrayRef;
+
+        state
+            .update_batch(&[array.clone(), array.clone(), array])
+            .unwrap();
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(deserialized.count(), 3);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_merge_batch() {
+        let mut state1 = UddSketchState::new(10, 0.01);
+        state1.update(1.0);
+        let state1_binary = state1.evaluate().unwrap();
+
+        let mut state2 = UddSketchState::new(10, 0.01);
+        state2.update(2.0);
+        let state2_binary = state2.evaluate().unwrap();
+
+        let mut merged_state = UddSketchState::new(10, 0.01);
+        if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
+            (&state1_binary, &state2_binary)
+        {
+            let binary_array = Arc::new(BinaryArray::from(vec![
+                bytes1.as_slice(),
+                bytes2.as_slice(),
+            ])) as ArrayRef;
+            merged_state.merge_batch(&[binary_array]).unwrap();
+
+            let result = merged_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(bytes)) = result {
+                let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
+                assert_eq!(deserialized.count(), 2);
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar values");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_size() {
+        let mut state = UddSketchState::new(10, 0.01);
+        let initial_size = state.size();
+
+        // Add some values to create buckets
+        state.update(1.0);
+        state.update(2.0);
+        state.update(3.0);
+
+        let size_with_values = state.size();
+        assert!(
+            size_with_values > initial_size,
+            "Size should increase after adding values: initial={}, with_values={}",
+            initial_size,
+            size_with_values
+        );
+
+        // Verify size increases with more buckets
+        state.update(10.0); // This should create a new bucket
+        assert!(
+            state.size() > size_with_values,
+            "Size should increase after adding new bucket: prev={}, new={}",
+            size_with_values,
+            state.size()
+        );
+    }
+}
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -22,10 +22,12 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
 use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
 use crate::scalars::date::DateFunction;
 use crate::scalars::expression::ExpressionFunction;
+use crate::scalars::hll_count::HllCalcFunction;
 use crate::scalars::json::JsonFunction;
 use crate::scalars::matches::MatchesFunction;
 use crate::scalars::math::MathFunction;
 use crate::scalars::timestamp::TimestampFunction;
+use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
 use crate::scalars::vector::VectorFunction;
 use crate::system::SystemFunction;
 use crate::table::TableFunction;
@@ -105,6 +107,8 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
    TimestampFunction::register(&function_registry);
    DateFunction::register(&function_registry);
    ExpressionFunction::register(&function_registry);
+    UddSketchCalcFunction::register(&function_registry);
+    HllCalcFunction::register(&function_registry);

    // Aggregate functions
    AggregateFunctions::register(&function_registry);
--- a/src/common/function/src/lib.rs
+++ b/src/common/function/src/lib.rs
@@ -21,6 +21,7 @@ pub mod scalars;
 mod system;
 mod table;

+pub mod aggr;
 pub mod function;
 pub mod function_registry;
 pub mod handlers;
--- a/src/common/function/src/scalars.rs
+++ b/src/common/function/src/scalars.rs
@@ -22,7 +22,9 @@ pub mod matches;
 pub mod math;
 pub mod vector;

+pub(crate) mod hll_count;
 #[cfg(test)]
 pub(crate) mod test;
 pub(crate) mod timestamp;
+pub(crate) mod uddsketch_calc;
 pub mod udf;
--- a/src/common/function/src/scalars/hll_count.rs
+++ b/src/common/function/src/scalars/hll_count.rs
@@ -0,0 +1,175 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of the scalar function `hll_count`.
+
+use std::fmt;
+use std::fmt::Display;
+use std::sync::Arc;
+
+use common_query::error::{DowncastVectorSnafu, InvalidFuncArgsSnafu, Result};
+use common_query::prelude::{Signature, Volatility};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::prelude::Vector;
+use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
+use datatypes::vectors::{BinaryVector, MutableVector, UInt64VectorBuilder, VectorRef};
+use hyperloglogplus::HyperLogLog;
+use snafu::OptionExt;
+
+use crate::aggr::HllStateType;
+use crate::function::{Function, FunctionContext};
+use crate::function_registry::FunctionRegistry;
+
+const NAME: &str = "hll_count";
+
+/// HllCalcFunction implements the scalar function `hll_count`.
+///
+/// It accepts one argument:
+/// 1. The serialized HyperLogLogPlus state, as produced by the aggregator (binary).
+///
+/// For each row, it deserializes the sketch and returns the estimated cardinality.
+#[derive(Debug, Default)]
+pub struct HllCalcFunction;
+
+impl HllCalcFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register(Arc::new(HllCalcFunction));
+    }
+}
+
+impl Display for HllCalcFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+impl Function for HllCalcFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::uint64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        // Only argument: HyperLogLogPlus state (binary)
+        Signature::exact(
+            vec![ConcreteDataType::binary_datatype()],
+            Volatility::Immutable,
+        )
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        if columns.len() != 1 {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!("hll_count expects 1 argument, got {}", columns.len()),
+            }
+            .fail();
+        }
+
+        let hll_vec = columns[0]
+            .as_any()
+            .downcast_ref::<BinaryVector>()
+            .with_context(|| DowncastVectorSnafu {
+                err_msg: format!("expect BinaryVector, got {}", columns[0].vector_type_name()),
+            })?;
+        let len = hll_vec.len();
+        let mut builder = UInt64VectorBuilder::with_capacity(len);
+
+        for i in 0..len {
+            let hll_opt = hll_vec.get_data(i);
+
+            if hll_opt.is_none() {
+                builder.push_null();
+                continue;
+            }
+
+            let hll_bytes = hll_opt.unwrap();
+
+            // Deserialize the HyperLogLogPlus from its bincode representation
+            let mut hll: HllStateType = match bincode::deserialize(hll_bytes) {
+                Ok(h) => h,
+                Err(e) => {
+                    common_telemetry::trace!("Failed to deserialize HyperLogLogPlus: {}", e);
+                    builder.push_null();
+                    continue;
+                }
+            };
+
+            builder.push(Some(hll.count().round() as u64));
+        }
+
+        Ok(builder.to_vector())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datatypes::vectors::BinaryVector;
+
+    use super::*;
+    use crate::utils::FixedRandomState;
+
+    #[test]
+    fn test_hll_count_function() {
+        let function = HllCalcFunction;
+        assert_eq!("hll_count", function.name());
+        assert_eq!(
+            ConcreteDataType::uint64_datatype(),
+            function
+                .return_type(&[ConcreteDataType::uint64_datatype()])
+                .unwrap()
+        );
+
+        // Create a test HLL
+        let mut hll = HllStateType::new(14, FixedRandomState::new()).unwrap();
+        for i in 1..=10 {
+            hll.insert(&i.to_string());
+        }
+
+        let serialized_bytes = bincode::serialize(&hll).unwrap();
+        let args: Vec<VectorRef> = vec![Arc::new(BinaryVector::from(vec![Some(serialized_bytes)]))];
+
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 1);
+
+        // Test cardinality estimate
+        if let datatypes::value::Value::UInt64(v) = result.get(0) {
+            assert_eq!(v, 10);
+        } else {
+            panic!("Expected uint64 value");
+        }
+    }
+
+    #[test]
+    fn test_hll_count_function_errors() {
+        let function = HllCalcFunction;
+
+        // Test with invalid number of arguments
+        let args: Vec<VectorRef> = vec![];
+        let result = function.eval(FunctionContext::default(), &args);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("hll_count expects 1 argument"));
+
+        // Test with invalid binary data
+        let args: Vec<VectorRef> = vec![Arc::new(BinaryVector::from(vec![Some(vec![1, 2, 3])]))]; // Invalid binary data
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 1);
+        assert!(matches!(result.get(0), datatypes::value::Value::Null));
+    }
+}
--- a/src/common/function/src/scalars/json.rs
+++ b/src/common/function/src/scalars/json.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use std::sync::Arc;
-mod json_get;
+pub mod json_get;
 mod json_is;
 mod json_path_exists;
 mod json_path_match;
--- a/src/common/function/src/scalars/uddsketch_calc.rs
+++ b/src/common/function/src/scalars/uddsketch_calc.rs
@@ -0,0 +1,211 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of the scalar function `uddsketch_calc`.
+
+use std::fmt;
+use std::fmt::Display;
+use std::sync::Arc;
+
+use common_query::error::{DowncastVectorSnafu, InvalidFuncArgsSnafu, Result};
+use common_query::prelude::{Signature, Volatility};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::prelude::Vector;
+use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
+use datatypes::vectors::{BinaryVector, Float64VectorBuilder, MutableVector, VectorRef};
+use snafu::OptionExt;
+use uddsketch::UDDSketch;
+
+use crate::function::{Function, FunctionContext};
+use crate::function_registry::FunctionRegistry;
+
+const NAME: &str = "uddsketch_calc";
+
+/// UddSketchCalcFunction implements the scalar function `uddsketch_calc`.
+///
+/// It accepts two arguments:
+/// 1. A percentile (as f64) for which to compute the estimated quantile (e.g. 0.95 for p95).
+/// 2. The serialized UDDSketch state, as produced by the aggregator (binary).
+///
+/// For each row, it deserializes the sketch and returns the computed quantile value.
+#[derive(Debug, Default)]
+pub struct UddSketchCalcFunction;
+
+impl UddSketchCalcFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register(Arc::new(UddSketchCalcFunction));
+    }
+}
+
+impl Display for UddSketchCalcFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+impl Function for UddSketchCalcFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::float64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        // First argument: percentile (float64)
+        // Second argument: UDDSketch state (binary)
+        Signature::exact(
+            vec![
+                ConcreteDataType::float64_datatype(),
+                ConcreteDataType::binary_datatype(),
+            ],
+            Volatility::Immutable,
+        )
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        if columns.len() != 2 {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!("uddsketch_calc expects 2 arguments, got {}", columns.len()),
+            }
+            .fail();
+        }
+
+        let perc_vec = &columns[0];
+        let sketch_vec = columns[1]
+            .as_any()
+            .downcast_ref::<BinaryVector>()
+            .with_context(|| DowncastVectorSnafu {
+                err_msg: format!("expect BinaryVector, got {}", columns[1].vector_type_name()),
+            })?;
+        let len = sketch_vec.len();
+        let mut builder = Float64VectorBuilder::with_capacity(len);
+
+        for i in 0..len {
+            let perc_opt = perc_vec.get(i).as_f64_lossy();
+            let sketch_opt = sketch_vec.get_data(i);
+
+            if sketch_opt.is_none() || perc_opt.is_none() {
+                builder.push_null();
+                continue;
+            }
+
+            let sketch_bytes = sketch_opt.unwrap();
+            let perc = perc_opt.unwrap();
+
+            // Deserialize the UDDSketch from its bincode representation
+            let sketch: UDDSketch = match bincode::deserialize(sketch_bytes) {
+                Ok(s) => s,
+                Err(e) => {
+                    common_telemetry::trace!("Failed to deserialize UDDSketch: {}", e);
+                    builder.push_null();
+                    continue;
+                }
+            };
+
+            // Compute the estimated quantile from the sketch
+            let result = sketch.estimate_quantile(perc);
+            builder.push(Some(result));
+        }
+
+        Ok(builder.to_vector())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datatypes::vectors::{BinaryVector, Float64Vector};
+
+    use super::*;
+
+    #[test]
+    fn test_uddsketch_calc_function() {
+        let function = UddSketchCalcFunction;
+        assert_eq!("uddsketch_calc", function.name());
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            function
+                .return_type(&[ConcreteDataType::float64_datatype()])
+                .unwrap()
+        );
+
+        // Create a test sketch
+        let mut sketch = UDDSketch::new(128, 0.01);
+        sketch.add_value(10.0);
+        sketch.add_value(20.0);
+        sketch.add_value(30.0);
+        sketch.add_value(40.0);
+        sketch.add_value(50.0);
+        sketch.add_value(60.0);
+        sketch.add_value(70.0);
+        sketch.add_value(80.0);
+        sketch.add_value(90.0);
+        sketch.add_value(100.0);
+
+        // Get expected values directly from the sketch
+        let expected_p50 = sketch.estimate_quantile(0.5);
+        let expected_p90 = sketch.estimate_quantile(0.9);
+        let expected_p95 = sketch.estimate_quantile(0.95);
+
+        let serialized = bincode::serialize(&sketch).unwrap();
+        let percentiles = vec![0.5, 0.9, 0.95];
+
+        let args: Vec<VectorRef> = vec![
+            Arc::new(Float64Vector::from_vec(percentiles.clone())),
+            Arc::new(BinaryVector::from(vec![Some(serialized.clone()); 3])),
+        ];
+
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 3);
+
+        // Test median (p50)
+        assert!(
+            matches!(result.get(0), datatypes::value::Value::Float64(v) if (v - expected_p50).abs() < 1e-10)
+        );
+        // Test p90
+        assert!(
+            matches!(result.get(1), datatypes::value::Value::Float64(v) if (v - expected_p90).abs() < 1e-10)
+        );
+        // Test p95
+        assert!(
+            matches!(result.get(2), datatypes::value::Value::Float64(v) if (v - expected_p95).abs() < 1e-10)
+        );
+    }
+
+    #[test]
+    fn test_uddsketch_calc_function_errors() {
+        let function = UddSketchCalcFunction;
+
+        // Test with invalid number of arguments
+        let args: Vec<VectorRef> = vec![Arc::new(Float64Vector::from_vec(vec![0.95]))];
+        let result = function.eval(FunctionContext::default(), &args);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("uddsketch_calc expects 2 arguments"));
+
+        // Test with invalid binary data
+        let args: Vec<VectorRef> = vec![
+            Arc::new(Float64Vector::from_vec(vec![0.95])),
+            Arc::new(BinaryVector::from(vec![Some(vec![1, 2, 3])])), // Invalid binary data
+        ];
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 1);
+        assert!(matches!(result.get(0), datatypes::value::Value::Null));
+    }
+}
--- a/src/common/function/src/scalars/vector.rs
+++ b/src/common/function/src/scalars/vector.rs
@@ -20,11 +20,12 @@ pub mod impl_conv;
 pub(crate) mod product;
 mod scalar_add;
 mod scalar_mul;
-mod sub;
 pub(crate) mod sum;
+mod vector_add;
 mod vector_div;
 mod vector_mul;
 mod vector_norm;
+mod vector_sub;

 use std::sync::Arc;

@@ -48,10 +49,11 @@ impl VectorFunction {
        registry.register(Arc::new(scalar_mul::ScalarMulFunction));

        // vector calculation
+        registry.register(Arc::new(vector_add::VectorAddFunction));
+        registry.register(Arc::new(vector_sub::VectorSubFunction));
        registry.register(Arc::new(vector_mul::VectorMulFunction));
-        registry.register(Arc::new(vector_norm::VectorNormFunction));
        registry.register(Arc::new(vector_div::VectorDivFunction));
-        registry.register(Arc::new(sub::SubFunction));
+        registry.register(Arc::new(vector_norm::VectorNormFunction));
        registry.register(Arc::new(elem_sum::ElemSumFunction));
        registry.register(Arc::new(elem_product::ElemProductFunction));
    }
--- a/src/common/function/src/scalars/vector/vector_add.rs
+++ b/src/common/function/src/scalars/vector/vector_add.rs
@@ -0,0 +1,214 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::borrow::Cow;
+use std::fmt::Display;
+
+use common_query::error::InvalidFuncArgsSnafu;
+use common_query::prelude::Signature;
+use datatypes::prelude::ConcreteDataType;
+use datatypes::scalars::ScalarVectorBuilder;
+use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
+use nalgebra::DVectorView;
+use snafu::ensure;
+
+use crate::function::{Function, FunctionContext};
+use crate::helper;
+use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
+
+const NAME: &str = "vec_add";
+
+/// Adds corresponding elements of two vectors, returns a vector.
+///
+/// # Example
+///
+/// ```sql
+/// SELECT vec_to_string(vec_add("[1.0, 1.0]", "[1.0, 2.0]")) as result;
+///
+/// +---------------------------------------------------------------+
+/// | vec_to_string(vec_add(Utf8("[1.0, 1.0]"),Utf8("[1.0, 2.0]"))) |
+/// +---------------------------------------------------------------+
+/// | [2,3]                                                         |
+/// +---------------------------------------------------------------+
+///
+#[derive(Debug, Clone, Default)]
+pub struct VectorAddFunction;
+
+impl Function for VectorAddFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(
+        &self,
+        _input_types: &[ConcreteDataType],
+    ) -> common_query::error::Result<ConcreteDataType> {
+        Ok(ConcreteDataType::binary_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        helper::one_of_sigs2(
+            vec![
+                ConcreteDataType::string_datatype(),
+                ConcreteDataType::binary_datatype(),
+            ],
+            vec![
+                ConcreteDataType::string_datatype(),
+                ConcreteDataType::binary_datatype(),
+            ],
+        )
+    }
+
+    fn eval(
+        &self,
+        _func_ctx: FunctionContext,
+        columns: &[VectorRef],
+    ) -> common_query::error::Result<VectorRef> {
+        ensure!(
+            columns.len() == 2,
+            InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The length of the args is not correct, expect exactly two, have: {}",
+                    columns.len()
+                )
+            }
+        );
+        let arg0 = &columns[0];
+        let arg1 = &columns[1];
+
+        ensure!(
+            arg0.len() == arg1.len(),
+            InvalidFuncArgsSnafu {
+                err_msg: format!(
+                    "The lengths of the vector are not aligned, args 0: {}, args 1: {}",
+                    arg0.len(),
+                    arg1.len(),
+                )
+            }
+        );
+
+        let len = arg0.len();
+        let mut result = BinaryVectorBuilder::with_capacity(len);
+        if len == 0 {
+            return Ok(result.to_vector());
+        }
+
+        let arg0_const = as_veclit_if_const(arg0)?;
+        let arg1_const = as_veclit_if_const(arg1)?;
+
+        for i in 0..len {
+            let arg0 = match arg0_const.as_ref() {
+                Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())),
+                None => as_veclit(arg0.get_ref(i))?,
+            };
+            let arg1 = match arg1_const.as_ref() {
+                Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
+                None => as_veclit(arg1.get_ref(i))?,
+            };
+            let (Some(arg0), Some(arg1)) = (arg0, arg1) else {
+                result.push_null();
+                continue;
+            };
+            let vec0 = DVectorView::from_slice(&arg0, arg0.len());
+            let vec1 = DVectorView::from_slice(&arg1, arg1.len());
+
+            let vec_res = vec0 + vec1;
+            let veclit = vec_res.as_slice();
+            let binlit = veclit_to_binlit(veclit);
+            result.push(Some(&binlit));
+        }
+
+        Ok(result.to_vector())
+    }
+}
+
+impl Display for VectorAddFunction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use common_query::error::Error;
+    use datatypes::vectors::StringVector;
+
+    use super::*;
+
+    #[test]
+    fn test_sub() {
+        let func = VectorAddFunction;
+
+        let input0 = Arc::new(StringVector::from(vec![
+            Some("[1.0,2.0,3.0]".to_string()),
+            Some("[4.0,5.0,6.0]".to_string()),
+            None,
+            Some("[2.0,3.0,3.0]".to_string()),
+        ]));
+        let input1 = Arc::new(StringVector::from(vec![
+            Some("[1.0,1.0,1.0]".to_string()),
+            Some("[6.0,5.0,4.0]".to_string()),
+            Some("[3.0,2.0,2.0]".to_string()),
+            None,
+        ]));
+
+        let result = func
+            .eval(FunctionContext::default(), &[input0, input1])
+            .unwrap();
+
+        let result = result.as_ref();
+        assert_eq!(result.len(), 4);
+        assert_eq!(
+            result.get_ref(0).as_binary().unwrap(),
+            Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice())
+        );
+        assert_eq!(
+            result.get_ref(1).as_binary().unwrap(),
+            Some(veclit_to_binlit(&[10.0, 10.0, 10.0]).as_slice())
+        );
+        assert!(result.get_ref(2).is_null());
+        assert!(result.get_ref(3).is_null());
+    }
+
+    #[test]
+    fn test_sub_error() {
+        let func = VectorAddFunction;
+
+        let input0 = Arc::new(StringVector::from(vec![
+            Some("[1.0,2.0,3.0]".to_string()),
+            Some("[4.0,5.0,6.0]".to_string()),
+            None,
+            Some("[2.0,3.0,3.0]".to_string()),
+        ]));
+        let input1 = Arc::new(StringVector::from(vec![
+            Some("[1.0,1.0,1.0]".to_string()),
+            Some("[6.0,5.0,4.0]".to_string()),
+            Some("[3.0,2.0,2.0]".to_string()),
+        ]));
+
+        let result = func.eval(FunctionContext::default(), &[input0, input1]);
+
+        match result {
+            Err(Error::InvalidFuncArgs { err_msg, .. }) => {
+                assert_eq!(
+                    err_msg,
+                    "The lengths of the vector are not aligned, args 0: 4, args 1: 3"
+                )
+            }
+            _ => unreachable!(),
+        }
+    }
+}
--- a/src/common/function/src/scalars/vector/vector_sub.rs
+++ b/src/common/function/src/scalars/vector/vector_sub.rs
@@ -42,19 +42,10 @@ const NAME: &str = "vec_sub";
 /// | [0,-1]                                                        |
 /// +---------------------------------------------------------------+
 ///
-/// -- Negative scalar to simulate subtraction
-/// SELECT vec_to_string(vec_sub('[-1.0, -1.0]', '[1.0, 2.0]'));
-///
-/// +-----------------------------------------------------------------+
-/// | vec_to_string(vec_sub(Utf8("[-1.0, -1.0]"),Utf8("[1.0, 2.0]"))) |
-/// +-----------------------------------------------------------------+
-/// | [-2,-3]                                                         |
-/// +-----------------------------------------------------------------+
-///
 #[derive(Debug, Clone, Default)]
-pub struct SubFunction;
+pub struct VectorSubFunction;

-impl Function for SubFunction {
+impl Function for VectorSubFunction {
    fn name(&self) -> &str {
        NAME
    }
@@ -142,7 +133,7 @@ impl Function for SubFunction {
    }
 }

-impl Display for SubFunction {
+impl Display for VectorSubFunction {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", NAME.to_ascii_uppercase())
    }
@@ -159,7 +150,7 @@ mod tests {

    #[test]
    fn test_sub() {
-        let func = SubFunction;
+        let func = VectorSubFunction;

        let input0 = Arc::new(StringVector::from(vec![
            Some("[1.0,2.0,3.0]".to_string()),
@@ -194,7 +185,7 @@ mod tests {

    #[test]
    fn test_sub_error() {
-        let func = SubFunction;
+        let func = VectorSubFunction;

        let input0 = Arc::new(StringVector::from(vec![
            Some("[1.0,2.0,3.0]".to_string()),
--- a/src/common/function/src/utils.rs
+++ b/src/common/function/src/utils.rs
@@ -12,6 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::hash::BuildHasher;
+
+use ahash::RandomState;
+use serde::{Deserialize, Serialize};
+
 /// Escapes special characters in the provided pattern string for `LIKE`.
 ///
 /// Specifically, it prefixes the backslash (`\`), percent (`%`), and underscore (`_`)
@@ -32,6 +37,71 @@ pub fn escape_like_pattern(pattern: &str) -> String {
        })
        .collect::<String>()
 }
+
+/// A random state with fixed seeds.
+///
+/// This is used to ensure that the hash values are consistent across
+/// different processes, and easy to serialize and deserialize.
+#[derive(Debug)]
+pub struct FixedRandomState {
+    state: RandomState,
+}
+
+impl FixedRandomState {
+    // some random seeds
+    const RANDOM_SEED_0: u64 = 0x517cc1b727220a95;
+    const RANDOM_SEED_1: u64 = 0x428a2f98d728ae22;
+    const RANDOM_SEED_2: u64 = 0x7137449123ef65cd;
+    const RANDOM_SEED_3: u64 = 0xb5c0fbcfec4d3b2f;
+
+    pub fn new() -> Self {
+        Self {
+            state: ahash::RandomState::with_seeds(
+                Self::RANDOM_SEED_0,
+                Self::RANDOM_SEED_1,
+                Self::RANDOM_SEED_2,
+                Self::RANDOM_SEED_3,
+            ),
+        }
+    }
+}
+
+impl Default for FixedRandomState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl BuildHasher for FixedRandomState {
+    type Hasher = ahash::AHasher;
+
+    fn build_hasher(&self) -> Self::Hasher {
+        self.state.build_hasher()
+    }
+
+    fn hash_one<T: std::hash::Hash>(&self, x: T) -> u64 {
+        self.state.hash_one(x)
+    }
+}
+
+impl Serialize for FixedRandomState {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_unit()
+    }
+}
+
+impl<'de> Deserialize<'de> for FixedRandomState {
+    fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        Ok(Self::new())
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/common/grpc-expr/Cargo.toml
+++ b/src/common/grpc-expr/Cargo.toml
@@ -22,4 +22,4 @@ store-api.workspace = true
 table.workspace = true

 [dev-dependencies]
-paste = "1.0"
+paste.workspace = true
--- a/src/common/grpc-expr/src/alter.rs
+++ b/src/common/grpc-expr/src/alter.rs
@@ -15,13 +15,14 @@
 use api::helper::ColumnDataTypeWrapper;
 use api::v1::add_column_location::LocationType;
 use api::v1::alter_table_expr::Kind;
-use api::v1::column_def::as_fulltext_option;
+use api::v1::column_def::{as_fulltext_option, as_skipping_index_type};
 use api::v1::{
    column_def, AddColumnLocation as Location, AlterTableExpr, Analyzer, CreateTableExpr,
    DropColumns, ModifyColumnTypes, RenameTable, SemanticType,
+    SkippingIndexType as PbSkippingIndexType,
 };
 use common_query::AddColumnLocation;
-use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema};
+use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema, SkippingIndexOptions};
 use snafu::{ensure, OptionExt, ResultExt};
 use store_api::region_request::{SetRegionOption, UnsetRegionOption};
 use table::metadata::TableId;
@@ -31,7 +32,8 @@ use table::requests::{
 };

 use crate::error::{
-    InvalidColumnDefSnafu, InvalidSetFulltextOptionRequestSnafu, InvalidSetTableOptionRequestSnafu,
+    InvalidColumnDefSnafu, InvalidSetFulltextOptionRequestSnafu,
+    InvalidSetSkippingIndexOptionRequestSnafu, InvalidSetTableOptionRequestSnafu,
    InvalidUnsetTableOptionRequestSnafu, MissingAlterIndexOptionSnafu, MissingFieldSnafu,
    MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu,
 };
@@ -137,6 +139,18 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
                        column_name: i.column_name,
                    },
                },
+                api::v1::set_index::Options::Skipping(s) => AlterKind::SetIndex {
+                    options: SetIndexOptions::Skipping {
+                        column_name: s.column_name,
+                        options: SkippingIndexOptions {
+                            granularity: s.granularity as u32,
+                            index_type: as_skipping_index_type(
+                                PbSkippingIndexType::try_from(s.skipping_index_type)
+                                    .context(InvalidSetSkippingIndexOptionRequestSnafu)?,
+                            ),
+                        },
+                    },
+                },
            },
            None => return MissingAlterIndexOptionSnafu.fail(),
        },
@@ -152,6 +166,11 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
                        column_name: i.column_name,
                    },
                },
+                api::v1::unset_index::Options::Skipping(s) => AlterKind::UnsetIndex {
+                    options: UnsetIndexOptions::Skipping {
+                        column_name: s.column_name,
+                    },
+                },
            },
            None => return MissingAlterIndexOptionSnafu.fail(),
        },
--- a/src/common/grpc-expr/src/error.rs
+++ b/src/common/grpc-expr/src/error.rs
@@ -140,6 +140,14 @@ pub enum Error {
        error: prost::UnknownEnumValue,
    },

+    #[snafu(display("Invalid set skipping index option request"))]
+    InvalidSetSkippingIndexOptionRequest {
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        error: prost::UnknownEnumValue,
+    },
+
    #[snafu(display("Missing alter index options"))]
    MissingAlterIndexOption {
        #[snafu(implicit)]
@@ -171,6 +179,7 @@ impl ErrorExt for Error {
            Error::InvalidSetTableOptionRequest { .. }
            | Error::InvalidUnsetTableOptionRequest { .. }
            | Error::InvalidSetFulltextOptionRequest { .. }
+            | Error::InvalidSetSkippingIndexOptionRequest { .. }
            | Error::MissingAlterIndexOption { .. } => StatusCode::InvalidArguments,
        }
    }
--- a/src/common/grpc-expr/src/insert.rs
+++ b/src/common/grpc-expr/src/insert.rs
@@ -14,37 +14,12 @@

 use api::helper;
 use api::v1::column::Values;
-use api::v1::{Column, CreateTableExpr};
 use common_base::BitVec;
 use datatypes::data_type::{ConcreteDataType, DataType};
 use datatypes::prelude::VectorRef;
 use snafu::{ensure, ResultExt};
-use table::metadata::TableId;
-use table::table_reference::TableReference;

 use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu};
-use crate::util;
-use crate::util::ColumnExpr;
-
-/// Try to build create table request from insert data.
-pub fn build_create_expr_from_insertion(
-    catalog_name: &str,
-    schema_name: &str,
-    table_id: Option<TableId>,
-    table_name: &str,
-    columns: &[Column],
-    engine: &str,
-) -> Result<CreateTableExpr> {
-    let table_name = TableReference::full(catalog_name, schema_name, table_name);
-    let column_exprs = ColumnExpr::from_columns(columns);
-    util::build_create_table_expr(
-        table_id,
-        &table_name,
-        column_exprs,
-        engine,
-        "Created on insertion",
-    )
-}

 pub(crate) fn add_values_to_builder(
    data_type: ConcreteDataType,
@@ -87,276 +62,7 @@ fn is_null(null_mask: &BitVec, idx: usize) -> Option<bool> {

 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-    use std::{assert_eq, vec};
-
-    use api::helper::ColumnDataTypeWrapper;
-    use api::v1::column::Values;
-    use api::v1::column_data_type_extension::TypeExt;
-    use api::v1::{
-        Column, ColumnDataType, ColumnDataTypeExtension, Decimal128, DecimalTypeExtension,
-        IntervalMonthDayNano, SemanticType,
-    };
-    use common_base::BitVec;
-    use common_catalog::consts::MITO_ENGINE;
-    use common_time::interval::IntervalUnit;
-    use common_time::timestamp::TimeUnit;
-    use datatypes::data_type::ConcreteDataType;
-    use datatypes::schema::{ColumnSchema, SchemaBuilder};
-    use snafu::ResultExt;
-
    use super::*;
-    use crate::error;
-    use crate::error::ColumnDataTypeSnafu;
-
-    #[inline]
-    fn build_column_schema(
-        column_name: &str,
-        datatype: i32,
-        nullable: bool,
-    ) -> error::Result<ColumnSchema> {
-        let datatype_wrapper =
-            ColumnDataTypeWrapper::try_new(datatype, None).context(ColumnDataTypeSnafu)?;
-
-        Ok(ColumnSchema::new(
-            column_name,
-            datatype_wrapper.into(),
-            nullable,
-        ))
-    }
-
-    #[test]
-    fn test_build_create_table_request() {
-        let table_id = Some(10);
-        let table_name = "test_metric";
-
-        assert!(
-            build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
-                .is_err()
-        );
-
-        let insert_batch = mock_insert_batch();
-
-        let create_expr = build_create_expr_from_insertion(
-            "",
-            "",
-            table_id,
-            table_name,
-            &insert_batch.0,
-            MITO_ENGINE,
-        )
-        .unwrap();
-
-        assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
-        assert_eq!(table_name, create_expr.table_name);
-        assert_eq!("Created on insertion".to_string(), create_expr.desc);
-        assert_eq!(
-            vec![create_expr.column_defs[0].name.clone()],
-            create_expr.primary_keys
-        );
-
-        let column_defs = create_expr.column_defs;
-        assert_eq!(column_defs[5].name, create_expr.time_index);
-        assert_eq!(7, column_defs.len());
-
-        assert_eq!(
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "host")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "cpu")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "memory")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "time")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "interval")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::timestamp_millisecond_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "ts")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        let decimal_column = column_defs.iter().find(|c| c.name == "decimals").unwrap();
-        assert_eq!(
-            ConcreteDataType::decimal128_datatype(38, 10),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    decimal_column.data_type,
-                    decimal_column.datatype_extension,
-                )
-                .unwrap()
-            )
-        );
-    }
-
-    #[test]
-    fn test_find_new_columns() {
-        let mut columns = Vec::with_capacity(1);
-        let cpu_column = build_column_schema("cpu", 10, true).unwrap();
-        let ts_column = build_column_schema("ts", 15, false)
-            .unwrap()
-            .with_time_index(true);
-        columns.push(cpu_column);
-        columns.push(ts_column);
-
-        let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
-
-        assert!(
-            util::extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
-                .unwrap()
-                .is_none()
-        );
-
-        let insert_batch = mock_insert_batch();
-
-        let add_columns =
-            util::extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
-                .unwrap()
-                .unwrap();
-
-        assert_eq!(5, add_columns.add_columns.len());
-        let host_column = &add_columns.add_columns[0];
-        assert_eq!(
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    host_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let memory_column = &add_columns.add_columns[1];
-        assert_eq!(
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    memory_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let time_column = &add_columns.add_columns[2];
-        assert_eq!(
-            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    time_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let interval_column = &add_columns.add_columns[3];
-        assert_eq!(
-            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    interval_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let decimal_column = &add_columns.add_columns[4];
-        assert_eq!(
-            ConcreteDataType::decimal128_datatype(38, 10),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    decimal_column.column_def.as_ref().unwrap().data_type,
-                    decimal_column
-                        .column_def
-                        .as_ref()
-                        .unwrap()
-                        .datatype_extension
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-    }

    #[test]
    fn test_is_null() {
@@ -371,127 +77,4 @@ mod tests {
        assert_eq!(None, is_null(&null_mask, 16));
        assert_eq!(None, is_null(&null_mask, 99));
    }
-
-    fn mock_insert_batch() -> (Vec<Column>, u32) {
-        let row_count = 2;
-
-        let host_vals = Values {
-            string_values: vec!["host1".to_string(), "host2".to_string()],
-            ..Default::default()
-        };
-        let host_column = Column {
-            column_name: "host".to_string(),
-            semantic_type: SemanticType::Tag as i32,
-            values: Some(host_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::String as i32,
-            ..Default::default()
-        };
-
-        let cpu_vals = Values {
-            f64_values: vec![0.31],
-            ..Default::default()
-        };
-        let cpu_column = Column {
-            column_name: "cpu".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(cpu_vals),
-            null_mask: vec![2],
-            datatype: ColumnDataType::Float64 as i32,
-            ..Default::default()
-        };
-
-        let mem_vals = Values {
-            f64_values: vec![0.1],
-            ..Default::default()
-        };
-        let mem_column = Column {
-            column_name: "memory".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(mem_vals),
-            null_mask: vec![1],
-            datatype: ColumnDataType::Float64 as i32,
-            ..Default::default()
-        };
-
-        let time_vals = Values {
-            time_millisecond_values: vec![100, 101],
-            ..Default::default()
-        };
-        let time_column = Column {
-            column_name: "time".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(time_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::TimeMillisecond as i32,
-            ..Default::default()
-        };
-
-        let interval1 = IntervalMonthDayNano {
-            months: 1,
-            days: 2,
-            nanoseconds: 3,
-        };
-        let interval2 = IntervalMonthDayNano {
-            months: 4,
-            days: 5,
-            nanoseconds: 6,
-        };
-        let interval_vals = Values {
-            interval_month_day_nano_values: vec![interval1, interval2],
-            ..Default::default()
-        };
-        let interval_column = Column {
-            column_name: "interval".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(interval_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::IntervalMonthDayNano as i32,
-            ..Default::default()
-        };
-
-        let ts_vals = Values {
-            timestamp_millisecond_values: vec![100, 101],
-            ..Default::default()
-        };
-        let ts_column = Column {
-            column_name: "ts".to_string(),
-            semantic_type: SemanticType::Timestamp as i32,
-            values: Some(ts_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::TimestampMillisecond as i32,
-            ..Default::default()
-        };
-        let decimal_vals = Values {
-            decimal128_values: vec![Decimal128 { hi: 0, lo: 123 }, Decimal128 { hi: 0, lo: 456 }],
-            ..Default::default()
-        };
-        let decimal_column = Column {
-            column_name: "decimals".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(decimal_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::Decimal128 as i32,
-            datatype_extension: Some(ColumnDataTypeExtension {
-                type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
-                    precision: 38,
-                    scale: 10,
-                })),
-            }),
-            options: None,
-        };
-
-        (
-            vec![
-                host_column,
-                cpu_column,
-                mem_column,
-                time_column,
-                interval_column,
-                ts_column,
-                decimal_column,
-            ],
-            row_count,
-        )
-    }
 }
--- a/src/common/grpc-expr/src/lib.rs
+++ b/src/common/grpc-expr/src/lib.rs
@@ -19,4 +19,3 @@ pub mod insert;
 pub mod util;

 pub use alter::{alter_expr_to_request, create_table_schema};
-pub use insert::build_create_expr_from_insertion;
--- a/src/common/grpc-expr/src/util.rs
+++ b/src/common/grpc-expr/src/util.rs
@@ -236,3 +236,414 @@ pub fn extract_new_columns(
        }))
    }
 }
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+    use std::{assert_eq, vec};
+
+    use api::helper::ColumnDataTypeWrapper;
+    use api::v1::column::Values;
+    use api::v1::column_data_type_extension::TypeExt;
+    use api::v1::{
+        Column, ColumnDataType, ColumnDataTypeExtension, Decimal128, DecimalTypeExtension,
+        IntervalMonthDayNano, SemanticType,
+    };
+    use common_catalog::consts::MITO_ENGINE;
+    use common_time::interval::IntervalUnit;
+    use common_time::timestamp::TimeUnit;
+    use datatypes::data_type::ConcreteDataType;
+    use datatypes::schema::{ColumnSchema, SchemaBuilder};
+    use snafu::ResultExt;
+
+    use super::*;
+    use crate::error;
+    use crate::error::ColumnDataTypeSnafu;
+
+    #[inline]
+    fn build_column_schema(
+        column_name: &str,
+        datatype: i32,
+        nullable: bool,
+    ) -> error::Result<ColumnSchema> {
+        let datatype_wrapper =
+            ColumnDataTypeWrapper::try_new(datatype, None).context(ColumnDataTypeSnafu)?;
+
+        Ok(ColumnSchema::new(
+            column_name,
+            datatype_wrapper.into(),
+            nullable,
+        ))
+    }
+
+    fn build_create_expr_from_insertion(
+        catalog_name: &str,
+        schema_name: &str,
+        table_id: Option<TableId>,
+        table_name: &str,
+        columns: &[Column],
+        engine: &str,
+    ) -> Result<CreateTableExpr> {
+        let table_name = TableReference::full(catalog_name, schema_name, table_name);
+        let column_exprs = ColumnExpr::from_columns(columns);
+        build_create_table_expr(
+            table_id,
+            &table_name,
+            column_exprs,
+            engine,
+            "Created on insertion",
+        )
+    }
+
+    #[test]
+    fn test_build_create_table_request() {
+        let table_id = Some(10);
+        let table_name = "test_metric";
+
+        assert!(
+            build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
+                .is_err()
+        );
+
+        let insert_batch = mock_insert_batch();
+
+        let create_expr = build_create_expr_from_insertion(
+            "",
+            "",
+            table_id,
+            table_name,
+            &insert_batch.0,
+            MITO_ENGINE,
+        )
+        .unwrap();
+
+        assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
+        assert_eq!(table_name, create_expr.table_name);
+        assert_eq!("Created on insertion".to_string(), create_expr.desc);
+        assert_eq!(
+            vec![create_expr.column_defs[0].name.clone()],
+            create_expr.primary_keys
+        );
+
+        let column_defs = create_expr.column_defs;
+        assert_eq!(column_defs[5].name, create_expr.time_index);
+        assert_eq!(7, column_defs.len());
+
+        assert_eq!(
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "host")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "cpu")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "memory")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "time")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "interval")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "ts")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        let decimal_column = column_defs.iter().find(|c| c.name == "decimals").unwrap();
+        assert_eq!(
+            ConcreteDataType::decimal128_datatype(38, 10),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    decimal_column.data_type,
+                    decimal_column.datatype_extension,
+                )
+                .unwrap()
+            )
+        );
+    }
+
+    #[test]
+    fn test_find_new_columns() {
+        let mut columns = Vec::with_capacity(1);
+        let cpu_column = build_column_schema("cpu", 10, true).unwrap();
+        let ts_column = build_column_schema("ts", 15, false)
+            .unwrap()
+            .with_time_index(true);
+        columns.push(cpu_column);
+        columns.push(ts_column);
+
+        let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
+
+        assert!(extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
+            .unwrap()
+            .is_none());
+
+        let insert_batch = mock_insert_batch();
+
+        let add_columns = extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(5, add_columns.add_columns.len());
+        let host_column = &add_columns.add_columns[0];
+        assert_eq!(
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    host_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let memory_column = &add_columns.add_columns[1];
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    memory_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let time_column = &add_columns.add_columns[2];
+        assert_eq!(
+            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    time_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let interval_column = &add_columns.add_columns[3];
+        assert_eq!(
+            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    interval_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let decimal_column = &add_columns.add_columns[4];
+        assert_eq!(
+            ConcreteDataType::decimal128_datatype(38, 10),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    decimal_column.column_def.as_ref().unwrap().data_type,
+                    decimal_column
+                        .column_def
+                        .as_ref()
+                        .unwrap()
+                        .datatype_extension
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+    }
+
+    fn mock_insert_batch() -> (Vec<Column>, u32) {
+        let row_count = 2;
+
+        let host_vals = Values {
+            string_values: vec!["host1".to_string(), "host2".to_string()],
+            ..Default::default()
+        };
+        let host_column = Column {
+            column_name: "host".to_string(),
+            semantic_type: SemanticType::Tag as i32,
+            values: Some(host_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::String as i32,
+            ..Default::default()
+        };
+
+        let cpu_vals = Values {
+            f64_values: vec![0.31],
+            ..Default::default()
+        };
+        let cpu_column = Column {
+            column_name: "cpu".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(cpu_vals),
+            null_mask: vec![2],
+            datatype: ColumnDataType::Float64 as i32,
+            ..Default::default()
+        };
+
+        let mem_vals = Values {
+            f64_values: vec![0.1],
+            ..Default::default()
+        };
+        let mem_column = Column {
+            column_name: "memory".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(mem_vals),
+            null_mask: vec![1],
+            datatype: ColumnDataType::Float64 as i32,
+            ..Default::default()
+        };
+
+        let time_vals = Values {
+            time_millisecond_values: vec![100, 101],
+            ..Default::default()
+        };
+        let time_column = Column {
+            column_name: "time".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(time_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::TimeMillisecond as i32,
+            ..Default::default()
+        };
+
+        let interval1 = IntervalMonthDayNano {
+            months: 1,
+            days: 2,
+            nanoseconds: 3,
+        };
+        let interval2 = IntervalMonthDayNano {
+            months: 4,
+            days: 5,
+            nanoseconds: 6,
+        };
+        let interval_vals = Values {
+            interval_month_day_nano_values: vec![interval1, interval2],
+            ..Default::default()
+        };
+        let interval_column = Column {
+            column_name: "interval".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(interval_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::IntervalMonthDayNano as i32,
+            ..Default::default()
+        };
+
+        let ts_vals = Values {
+            timestamp_millisecond_values: vec![100, 101],
+            ..Default::default()
+        };
+        let ts_column = Column {
+            column_name: "ts".to_string(),
+            semantic_type: SemanticType::Timestamp as i32,
+            values: Some(ts_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::TimestampMillisecond as i32,
+            ..Default::default()
+        };
+        let decimal_vals = Values {
+            decimal128_values: vec![Decimal128 { hi: 0, lo: 123 }, Decimal128 { hi: 0, lo: 456 }],
+            ..Default::default()
+        };
+        let decimal_column = Column {
+            column_name: "decimals".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(decimal_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::Decimal128 as i32,
+            datatype_extension: Some(ColumnDataTypeExtension {
+                type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
+                    precision: 38,
+                    scale: 10,
+                })),
+            }),
+            options: None,
+        };
+
+        (
+            vec![
+                host_column,
+                cpu_column,
+                mem_column,
+                time_column,
+                interval_column,
+                ts_column,
+                decimal_column,
+            ],
+            row_count,
+        )
+    }
+}
--- a/src/common/grpc/src/channel_manager.rs
+++ b/src/common/grpc/src/channel_manager.rs
@@ -445,10 +445,16 @@ impl Pool {

 async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
    let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
-
+    // use weak ref here to prevent pool being leaked
+    let pool_weak = Arc::downgrade(&pool);
    loop {
        let _ = interval.tick().await;
-        pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
+        if let Some(pool) = pool_weak.upgrade() {
+            pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
+        } else {
+            // no one is using this pool, so we can also let go
+            break;
+        }
    }
 }

--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -6,7 +6,7 @@ license.workspace = true

 [features]
 testing = []
-pg_kvbackend = ["dep:tokio-postgres", "dep:backon"]
+pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]

 [lints]
 workspace = true
@@ -36,8 +36,8 @@ common-wal.workspace = true
 datafusion-common.workspace = true
 datafusion-expr.workspace = true
 datatypes.workspace = true
-deadpool.workspace = true
-deadpool-postgres.workspace = true
+deadpool = { workspace = true, optional = true }
+deadpool-postgres = { workspace = true, optional = true }
 derive_builder.workspace = true
 etcd-client.workspace = true
 futures.workspace = true
--- a/src/common/meta/src/cache/flow/table_flownode.rs
+++ b/src/common/meta/src/cache/flow/table_flownode.rs
@@ -16,7 +16,6 @@ use std::collections::HashMap;
 use std::sync::Arc;

 use futures::future::BoxFuture;
-use futures::TryStreamExt;
 use moka::future::Cache;
 use moka::ops::compute::Op;
 use table::metadata::TableId;
@@ -54,9 +53,13 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
        Box::pin(async move {
            table_flow_manager
                .flows(table_id)
-                .map_ok(|(key, value)| (key.flownode_id(), value.peer))
-                .try_collect::<HashMap<_, _>>()
                .await
+                .map(|flows| {
+                    flows
+                        .into_iter()
+                        .map(|(key, value)| (key.flownode_id(), value.peer))
+                        .collect::<HashMap<_, _>>()
+                })
                // We must cache the `HashSet` even if it's empty,
                // to avoid future requests to the remote storage next time;
                // If the value is added to the remote storage,
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::str::FromStr;

+use api::v1::meta::HeartbeatRequest;
 use common_error::ext::ErrorExt;
 use lazy_static::lazy_static;
 use regex::Regex;
@@ -55,12 +57,10 @@ pub trait ClusterInfo {
 }

 /// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
-///
-/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
-/// a `cluster_id`, it serves multiple clusters.
-#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct NodeInfoKey {
    /// The cluster id.
+    // todo(hl): remove cluster_id as it is not assigned anywhere.
    pub cluster_id: ClusterId,
    /// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
    pub role: Role,
@@ -69,6 +69,28 @@ pub struct NodeInfoKey {
 }

 impl NodeInfoKey {
+    /// Try to create a `NodeInfoKey` from a "good" heartbeat request. "good" as in every needed
+    /// piece of information is provided and valid.  
+    pub fn new(request: &HeartbeatRequest) -> Option<Self> {
+        let HeartbeatRequest { header, peer, .. } = request;
+        let header = header.as_ref()?;
+        let peer = peer.as_ref()?;
+
+        let role = header.role.try_into().ok()?;
+        let node_id = match role {
+            // Because the Frontend is stateless, it's too easy to neglect choosing a unique id
+            // for it when setting up a cluster. So we calculate its id from its address.
+            Role::Frontend => calculate_node_id(&peer.addr),
+            _ => peer.id,
+        };
+
+        Some(NodeInfoKey {
+            cluster_id: header.cluster_id,
+            role,
+            node_id,
+        })
+    }
+
    pub fn key_prefix_with_cluster_id(cluster_id: u64) -> String {
        format!("{}-{}-", CLUSTER_NODE_INFO_PREFIX, cluster_id)
    }
@@ -83,6 +105,13 @@ impl NodeInfoKey {
    }
 }

+/// Calculate (by using the DefaultHasher) the node's id from its address.
+fn calculate_node_id(addr: &str) -> u64 {
+    let mut hasher = DefaultHasher::new();
+    addr.hash(&mut hasher);
+    hasher.finish()
+}
+
 /// The information of a node in the cluster.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct NodeInfo {
@@ -100,7 +129,7 @@ pub struct NodeInfo {
    pub start_time_ms: u64,
 }

-#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub enum Role {
    Datanode,
    Frontend,
@@ -201,8 +230,8 @@ impl TryFrom<Vec<u8>> for NodeInfoKey {
    }
 }

-impl From<NodeInfoKey> for Vec<u8> {
-    fn from(key: NodeInfoKey) -> Self {
+impl From<&NodeInfoKey> for Vec<u8> {
+    fn from(key: &NodeInfoKey) -> Self {
        format!(
            "{}-{}-{}-{}",
            CLUSTER_NODE_INFO_PREFIX,
@@ -271,6 +300,7 @@ impl TryFrom<i32> for Role {
 mod tests {
    use std::assert_matches::assert_matches;

+    use super::*;
    use crate::cluster::Role::{Datanode, Frontend};
    use crate::cluster::{DatanodeStatus, NodeInfo, NodeInfoKey, NodeStatus};
    use crate::peer::Peer;
@@ -283,7 +313,7 @@ mod tests {
            node_id: 2,
        };

-        let key_bytes: Vec<u8> = key.into();
+        let key_bytes: Vec<u8> = (&key).into();
        let new_key: NodeInfoKey = key_bytes.try_into().unwrap();

        assert_eq!(1, new_key.cluster_id);
@@ -338,4 +368,26 @@ mod tests {
        let prefix = NodeInfoKey::key_prefix_with_role(2, Frontend);
        assert_eq!(prefix, "__meta_cluster_node_info-2-1-");
    }
+
+    #[test]
+    fn test_calculate_node_id_from_addr() {
+        // Test empty string
+        assert_eq!(calculate_node_id(""), calculate_node_id(""));
+
+        // Test same addresses return same ids
+        let addr1 = "127.0.0.1:8080";
+        let id1 = calculate_node_id(addr1);
+        let id2 = calculate_node_id(addr1);
+        assert_eq!(id1, id2);
+
+        // Test different addresses return different ids
+        let addr2 = "127.0.0.1:8081";
+        let id3 = calculate_node_id(addr2);
+        assert_ne!(id1, id3);
+
+        // Test long address
+        let long_addr = "very.long.domain.name.example.com:9999";
+        let id4 = calculate_node_id(long_addr);
+        assert!(id4 > 0);
+    }
 }
--- a/src/common/meta/src/ddl/create_flow.rs
+++ b/src/common/meta/src/ddl/create_flow.rs
@@ -15,6 +15,7 @@
 mod metadata;

 use std::collections::BTreeMap;
+use std::fmt;

 use api::v1::flow::flow_request::Body as PbFlowRequest;
 use api::v1::flow::{CreateRequest, FlowRequest, FlowRequestHeader};
@@ -28,7 +29,6 @@ use common_procedure::{
 use common_telemetry::info;
 use common_telemetry::tracing_context::TracingContext;
 use futures::future::join_all;
-use futures::TryStreamExt;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, ResultExt};
@@ -77,6 +77,7 @@ impl CreateFlowProcedure {
                query_context,
                state: CreateFlowState::Prepare,
                prev_flow_info_value: None,
+                flow_type: None,
            },
        }
    }
@@ -104,7 +105,7 @@ impl CreateFlowProcedure {
        if create_if_not_exists && or_replace {
            // this is forbidden because not clear what does that mean exactly
            return error::UnsupportedSnafu {
-                operation: "Create flow with both `IF NOT EXISTS` and `OR REPLACE`".to_string(),
+                operation: "Create flow with both `IF NOT EXISTS` and `OR REPLACE`",
            }
            .fail();
        }
@@ -129,9 +130,10 @@ impl CreateFlowProcedure {
                .flow_metadata_manager
                .flow_route_manager()
                .routes(flow_id)
-                .map_ok(|(_, value)| value.peer)
-                .try_collect::<Vec<_>>()
-                .await?;
+                .await?
+                .into_iter()
+                .map(|(_, value)| value.peer)
+                .collect::<Vec<_>>();
            self.data.flow_id = Some(flow_id);
            self.data.peers = peers;
            info!("Replacing flow, flow_id: {}", flow_id);
@@ -175,6 +177,8 @@ impl CreateFlowProcedure {
            self.allocate_flow_id().await?;
        }
        self.data.state = CreateFlowState::CreateFlows;
+        // determine flow type
+        self.data.flow_type = Some(determine_flow_type(&self.data.task));

        Ok(Status::executing(true))
    }
@@ -309,6 +313,11 @@ impl Procedure for CreateFlowProcedure {
    }
 }

+pub fn determine_flow_type(_flow_task: &CreateFlowTask) -> FlowType {
+    // TODO(discord9): determine flow type
+    FlowType::RecordingRule
+}
+
 /// The state of [CreateFlowProcedure].
 #[derive(Debug, Clone, Serialize, Deserialize, AsRefStr, PartialEq)]
 pub enum CreateFlowState {
@@ -322,6 +331,36 @@ pub enum CreateFlowState {
    CreateMetadata,
 }

+/// The type of flow.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub enum FlowType {
+    /// The flow is a recording rule task.
+    RecordingRule,
+    /// The flow is a streaming task.
+    Streaming,
+}
+
+impl FlowType {
+    pub const RECORDING_RULE: &str = "recording_rule";
+    pub const STREAMING: &str = "streaming";
+    pub const FLOW_TYPE_KEY: &str = "flow_type";
+}
+
+impl Default for FlowType {
+    fn default() -> Self {
+        Self::RecordingRule
+    }
+}
+
+impl fmt::Display for FlowType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            FlowType::RecordingRule => write!(f, "{}", FlowType::RECORDING_RULE),
+            FlowType::Streaming => write!(f, "{}", FlowType::STREAMING),
+        }
+    }
+}
+
 /// The serializable data.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct CreateFlowData {
@@ -335,6 +374,7 @@ pub struct CreateFlowData {
    /// For verify if prev value is consistent when need to update flow metadata.
    /// only set when `or_replace` is true.
    pub(crate) prev_flow_info_value: Option<DeserializedValueWithBytes<FlowInfoValue>>,
+    pub(crate) flow_type: Option<FlowType>,
 }

 impl From<&CreateFlowData> for CreateRequest {
@@ -342,7 +382,7 @@ impl From<&CreateFlowData> for CreateRequest {
        let flow_id = value.flow_id.unwrap();
        let source_table_ids = &value.source_table_ids;

-        CreateRequest {
+        let mut req = CreateRequest {
            flow_id: Some(api::v1::FlowId { id: flow_id }),
            source_table_ids: source_table_ids
                .iter()
@@ -356,7 +396,12 @@ impl From<&CreateFlowData> for CreateRequest {
            comment: value.task.comment.clone(),
            sql: value.task.sql.clone(),
            flow_options: value.task.flow_options.clone(),
-        }
+        };
+
+        let flow_type = value.flow_type.unwrap_or_default().to_string();
+        req.flow_options
+            .insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
+        req
    }
 }

@@ -369,7 +414,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            expire_after,
            comment,
            sql,
-            flow_options: options,
+            flow_options: mut options,
            ..
        } = value.task.clone();

@@ -386,19 +431,21 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            .map(|(idx, peer)| (idx as u32, FlowRouteValue { peer: peer.clone() }))
            .collect::<Vec<_>>();

-        (
-            FlowInfoValue {
-                source_table_ids: value.source_table_ids.clone(),
-                sink_table_name,
-                flownode_ids,
-                catalog_name,
-                flow_name,
-                raw_sql: sql,
-                expire_after,
-                comment,
-                options,
-            },
-            flow_routes,
-        )
+        let flow_type = value.flow_type.unwrap_or_default().to_string();
+        options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
+
+        let flow_info = FlowInfoValue {
+            source_table_ids: value.source_table_ids.clone(),
+            sink_table_name,
+            flownode_ids,
+            catalog_name,
+            flow_name,
+            raw_sql: sql,
+            expire_after,
+            comment,
+            options,
+        };
+
+        (flow_info, flow_routes)
    }
 }
--- a/src/common/meta/src/ddl/drop_database/executor.rs
+++ b/src/common/meta/src/ddl/drop_database/executor.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 use std::any::Any;
+use std::collections::HashMap;

 use common_procedure::Status;
 use common_telemetry::info;
@@ -25,6 +26,7 @@ use super::cursor::DropDatabaseCursor;
 use super::{DropDatabaseContext, DropTableTarget};
 use crate::ddl::drop_database::State;
 use crate::ddl::drop_table::executor::DropTableExecutor;
+use crate::ddl::utils::extract_region_wal_options;
 use crate::ddl::DdlContext;
 use crate::error::{self, Result};
 use crate::key::table_route::TableRouteValue;
@@ -107,12 +109,26 @@ impl State for DropDatabaseExecutor {
            self.physical_table_id,
            self.physical_region_routes.clone(),
        );
+
+        // Deletes topic-region mapping if dropping physical table
+        let region_wal_options =
+            if let TableRouteValue::Physical(table_route_value) = &table_route_value {
+                let datanode_table_values = ddl_ctx
+                    .table_metadata_manager
+                    .datanode_table_manager()
+                    .regions(self.physical_table_id, table_route_value)
+                    .await?;
+                extract_region_wal_options(&datanode_table_values)?
+            } else {
+                HashMap::new()
+            };
+
        executor
-            .on_destroy_metadata(ddl_ctx, &table_route_value)
+            .on_destroy_metadata(ddl_ctx, &table_route_value, &region_wal_options)
            .await?;
        executor.invalidate_table_cache(ddl_ctx).await?;
        executor
-            .on_drop_regions(ddl_ctx, &self.physical_region_routes)
+            .on_drop_regions(ddl_ctx, &self.physical_region_routes, true)
            .await?;
        info!("Table: {}({}) is dropped", self.table_name, self.table_id);

--- a/src/common/meta/src/ddl/drop_flow/metadata.rs
+++ b/src/common/meta/src/ddl/drop_flow/metadata.rs
@@ -13,7 +13,6 @@
 // limitations under the License.

 use common_catalog::format_full_flow_name;
-use futures::TryStreamExt;
 use snafu::{ensure, OptionExt};

 use crate::ddl::drop_flow::DropFlowProcedure;
@@ -39,9 +38,10 @@ impl DropFlowProcedure {
            .flow_metadata_manager
            .flow_route_manager()
            .routes(self.data.task.flow_id)
-            .map_ok(|(_, value)| value)
-            .try_collect::<Vec<_>>()
-            .await?;
+            .await?
+            .into_iter()
+            .map(|(_, value)| value)
+            .collect::<Vec<_>>();
        ensure!(
            !flow_route_values.is_empty(),
            error::FlowRouteNotFoundSnafu {
--- a/src/common/meta/src/ddl/drop_table.rs
+++ b/src/common/meta/src/ddl/drop_table.rs
@@ -15,6 +15,8 @@
 pub(crate) mod executor;
 mod metadata;

+use std::collections::HashMap;
+
 use async_trait::async_trait;
 use common_error::ext::BoxedError;
 use common_procedure::error::{ExternalSnafu, FromJsonSnafu, ToJsonSnafu};
@@ -24,8 +26,10 @@ use common_procedure::{
 };
 use common_telemetry::info;
 use common_telemetry::tracing::warn;
+use common_wal::options::WalOptions;
 use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt};
+use store_api::storage::RegionNumber;
 use strum::AsRefStr;
 use table::metadata::TableId;
 use table::table_reference::TableReference;
@@ -131,7 +135,11 @@ impl DropTableProcedure {
        );
        // Deletes table metadata logically.
        self.executor
-            .on_delete_metadata(&self.context, table_route_value)
+            .on_delete_metadata(
+                &self.context,
+                table_route_value,
+                &self.data.region_wal_options,
+            )
            .await?;
        info!("Deleted table metadata for table {table_id}");
        self.data.state = DropTableState::InvalidateTableCache;
@@ -148,7 +156,7 @@ impl DropTableProcedure {

    pub async fn on_datanode_drop_regions(&mut self) -> Result<Status> {
        self.executor
-            .on_drop_regions(&self.context, &self.data.physical_region_routes)
+            .on_drop_regions(&self.context, &self.data.physical_region_routes, false)
            .await?;
        self.data.state = DropTableState::DeleteTombstone;
        Ok(Status::executing(true))
@@ -163,7 +171,11 @@ impl DropTableProcedure {
            self.data.physical_region_routes.clone(),
        );
        self.executor
-            .on_delete_metadata_tombstone(&self.context, table_route_value)
+            .on_delete_metadata_tombstone(
+                &self.context,
+                table_route_value,
+                &self.data.region_wal_options,
+            )
            .await?;

        self.dropping_regions.clear();
@@ -243,7 +255,11 @@ impl Procedure for DropTableProcedure {
            self.data.physical_region_routes.clone(),
        );
        self.executor
-            .on_restore_metadata(&self.context, table_route_value)
+            .on_restore_metadata(
+                &self.context,
+                table_route_value,
+                &self.data.region_wal_options,
+            )
            .await
            .map_err(ProcedureError::external)
    }
@@ -257,6 +273,8 @@ pub struct DropTableData {
    pub physical_region_routes: Vec<RegionRoute>,
    pub physical_table_id: Option<TableId>,
    #[serde(default)]
+    pub region_wal_options: HashMap<RegionNumber, WalOptions>,
+    #[serde(default)]
    pub allow_rollback: bool,
 }

@@ -268,6 +286,7 @@ impl DropTableData {
            task,
            physical_region_routes: vec![],
            physical_table_id: None,
+            region_wal_options: HashMap::new(),
            allow_rollback: false,
        }
    }
--- a/src/common/meta/src/ddl/drop_table/executor.rs
+++ b/src/common/meta/src/ddl/drop_table/executor.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
+
 use api::v1::region::{
    region_request, DropRequest as PbDropRegionRequest, RegionRequest, RegionRequestHeader,
 };
@@ -19,9 +21,10 @@ use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
 use common_telemetry::debug;
 use common_telemetry::tracing_context::TracingContext;
+use common_wal::options::WalOptions;
 use futures::future::join_all;
 use snafu::ensure;
-use store_api::storage::RegionId;
+use store_api::storage::{RegionId, RegionNumber};
 use table::metadata::TableId;
 use table::table_name::TableName;

@@ -113,9 +116,15 @@ impl DropTableExecutor {
        &self,
        ctx: &DdlContext,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<RegionNumber, WalOptions>,
    ) -> Result<()> {
        ctx.table_metadata_manager
-            .delete_table_metadata(self.table_id, &self.table, table_route_value)
+            .delete_table_metadata(
+                self.table_id,
+                &self.table,
+                table_route_value,
+                region_wal_options,
+            )
            .await
    }

@@ -124,9 +133,15 @@ impl DropTableExecutor {
        &self,
        ctx: &DdlContext,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<u32, WalOptions>,
    ) -> Result<()> {
        ctx.table_metadata_manager
-            .delete_table_metadata_tombstone(self.table_id, &self.table, table_route_value)
+            .delete_table_metadata_tombstone(
+                self.table_id,
+                &self.table,
+                table_route_value,
+                region_wal_options,
+            )
            .await
    }

@@ -135,9 +150,15 @@ impl DropTableExecutor {
        &self,
        ctx: &DdlContext,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<u32, WalOptions>,
    ) -> Result<()> {
        ctx.table_metadata_manager
-            .destroy_table_metadata(self.table_id, &self.table, table_route_value)
+            .destroy_table_metadata(
+                self.table_id,
+                &self.table,
+                table_route_value,
+                region_wal_options,
+            )
            .await?;

        let detecting_regions = if table_route_value.is_physical() {
@@ -156,9 +177,15 @@ impl DropTableExecutor {
        &self,
        ctx: &DdlContext,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<u32, WalOptions>,
    ) -> Result<()> {
        ctx.table_metadata_manager
-            .restore_table_metadata(self.table_id, &self.table, table_route_value)
+            .restore_table_metadata(
+                self.table_id,
+                &self.table,
+                table_route_value,
+                region_wal_options,
+            )
            .await
    }

@@ -187,6 +214,7 @@ impl DropTableExecutor {
        &self,
        ctx: &DdlContext,
        region_routes: &[RegionRoute],
+        fast_path: bool,
    ) -> Result<()> {
        let leaders = find_leaders(region_routes);
        let mut drop_region_tasks = Vec::with_capacity(leaders.len());
@@ -209,6 +237,7 @@ impl DropTableExecutor {
                    }),
                    body: Some(region_request::Body::Drop(PbDropRegionRequest {
                        region_id: region_id.as_u64(),
+                        fast_path,
                    })),
                };
                let datanode = datanode.clone();
--- a/src/common/meta/src/ddl/drop_table/metadata.rs
+++ b/src/common/meta/src/ddl/drop_table/metadata.rs
@@ -17,6 +17,7 @@ use snafu::OptionExt;
 use store_api::metric_engine_consts::METRIC_ENGINE_NAME;

 use crate::ddl::drop_table::DropTableProcedure;
+use crate::ddl::utils::extract_region_wal_options;
 use crate::error::{self, Result};

 impl DropTableProcedure {
@@ -30,9 +31,6 @@ impl DropTableProcedure {
            .get_physical_table_route(task.table_id)
            .await?;

-        self.data.physical_region_routes = physical_table_route_value.region_routes;
-        self.data.physical_table_id = Some(physical_table_id);
-
        if physical_table_id == self.data.table_id() {
            let table_info_value = self
                .context
@@ -47,9 +45,21 @@ impl DropTableProcedure {

            let engine = table_info_value.table_info.meta.engine;
            // rollback only if dropping the metric physical table fails
-            self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME
+            self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME;
+
+            // Deletes topic-region mapping if dropping physical table
+            let datanode_table_values = self
+                .context
+                .table_metadata_manager
+                .datanode_table_manager()
+                .regions(physical_table_id, &physical_table_route_value)
+                .await?;
+            self.data.region_wal_options = extract_region_wal_options(&datanode_table_values)?;
        }

+        self.data.physical_region_routes = physical_table_route_value.region_routes;
+        self.data.physical_table_id = Some(physical_table_id);
+
        Ok(())
    }
 }
--- a/src/common/meta/src/ddl/tests/create_view.rs
+++ b/src/common/meta/src/ddl/tests/create_view.rs
@@ -219,7 +219,7 @@ async fn test_replace_view_metadata() {
        assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
    }

-    // Set `or_replce` to be `true` and try again
+    // Set `or_replace` to be `true` and try again
    task.create_view.or_replace = true;
    task.create_view.logical_plan = vec![4, 5, 6];
    task.create_view.definition = "new_definition".to_string();
--- a/src/common/meta/src/ddl/utils.rs
+++ b/src/common/meta/src/ddl/utils.rs
@@ -12,16 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
+
 use common_catalog::consts::METRIC_ENGINE;
 use common_error::ext::BoxedError;
 use common_procedure::error::Error as ProcedureError;
+use common_wal::options::WalOptions;
 use snafu::{ensure, OptionExt, ResultExt};
 use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
+use store_api::storage::RegionNumber;
 use table::metadata::TableId;
 use table::table_reference::TableReference;

 use crate::ddl::DetectingRegion;
-use crate::error::{Error, OperateDatanodeSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu};
+use crate::error::{
+    Error, OperateDatanodeSnafu, ParseWalOptionsSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu,
+};
+use crate::key::datanode_table::DatanodeTableValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::TableMetadataManagerRef;
 use crate::peer::Peer;
@@ -151,6 +158,32 @@ pub fn convert_region_routes_to_detecting_regions(
        .collect::<Vec<_>>()
 }

+/// Parses [WalOptions] from serialized strings in hashmap.
+pub fn parse_region_wal_options(
+    serialized_options: &HashMap<RegionNumber, String>,
+) -> Result<HashMap<RegionNumber, WalOptions>> {
+    let mut region_wal_options = HashMap::with_capacity(serialized_options.len());
+    for (region_number, wal_options) in serialized_options {
+        let wal_option = serde_json::from_str::<WalOptions>(wal_options)
+            .context(ParseWalOptionsSnafu { wal_options })?;
+        region_wal_options.insert(*region_number, wal_option);
+    }
+    Ok(region_wal_options)
+}
+
+/// Extracts region wal options from [DatanodeTableValue]s.
+pub fn extract_region_wal_options(
+    datanode_table_values: &Vec<DatanodeTableValue>,
+) -> Result<HashMap<RegionNumber, WalOptions>> {
+    let mut region_wal_options = HashMap::new();
+    for value in datanode_table_values {
+        let serialized_options = &value.region_info.region_wal_options;
+        let parsed_options = parse_region_wal_options(serialized_options)?;
+        region_wal_options.extend(parsed_options);
+    }
+    Ok(region_wal_options)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -686,8 +686,8 @@ pub enum Error {
    },

    #[cfg(feature = "pg_kvbackend")]
-    #[snafu(display("Postgres transaction retry failed"))]
-    PostgresTransactionRetryFailed {
+    #[snafu(display("Rds transaction retry failed"))]
+    RdsTransactionRetryFailed {
        #[snafu(implicit)]
        location: Location,
    },
@@ -710,6 +710,15 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Failed to parse wal options: {}", wal_options))]
+    ParseWalOptions {
+        wal_options: String,
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        error: serde_json::Error,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -762,7 +771,8 @@ impl ErrorExt for Error {
            | UnexpectedLogicalRouteTable { .. }
            | ProcedureOutput { .. }
            | FromUtf8 { .. }
-            | MetadataCorruption { .. } => StatusCode::Unexpected,
+            | MetadataCorruption { .. }
+            | ParseWalOptions { .. } => StatusCode::Unexpected,

            SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } => StatusCode::Internal,

@@ -814,7 +824,7 @@ impl ErrorExt for Error {
            | CreatePostgresPool { .. }
            | GetPostgresConnection { .. }
            | PostgresTransaction { .. }
-            | PostgresTransactionRetryFailed { .. } => StatusCode::Internal,
+            | RdsTransactionRetryFailed { .. } => StatusCode::Internal,
            Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
        }
    }
--- a/src/common/meta/src/key.rs
+++ b/src/common/meta/src/key.rs
@@ -57,7 +57,7 @@
 //!     - This key is mainly used in constructing the view in Datanode and Frontend.
 //!
 //! 12. Kafka topic key: `__topic_name/kafka/{topic_name}`
-//！    - The key is used to mark existing topics in kafka for WAL.
+//!     - The key is used to mark existing topics in kafka for WAL.
 //!
 //! 13. Topic name to region map key `__topic_region/{topic_name}/{region_id}`
 //!     - Mapping {topic_name} to {region_id}
@@ -122,6 +122,7 @@ use common_catalog::consts::{
    DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
 };
 use common_telemetry::warn;
+use common_wal::options::WalOptions;
 use datanode_table::{DatanodeTableKey, DatanodeTableManager, DatanodeTableValue};
 use flow::flow_route::FlowRouteValue;
 use flow::table_flow::TableFlowValue;
@@ -136,6 +137,7 @@ use table::metadata::{RawTableInfo, TableId};
 use table::table_name::TableName;
 use table_info::{TableInfoKey, TableInfoManager, TableInfoValue};
 use table_name::{TableNameKey, TableNameManager, TableNameValue};
+use topic_region::{TopicRegionKey, TopicRegionManager};
 use view_info::{ViewInfoKey, ViewInfoManager, ViewInfoValue};

 use self::catalog_name::{CatalogManager, CatalogNameKey, CatalogNameValue};
@@ -306,6 +308,7 @@ pub struct TableMetadataManager {
    schema_manager: SchemaManager,
    table_route_manager: TableRouteManager,
    tombstone_manager: TombstoneManager,
+    topic_region_manager: TopicRegionManager,
    kv_backend: KvBackendRef,
 }

@@ -456,6 +459,7 @@ impl TableMetadataManager {
            schema_manager: SchemaManager::new(kv_backend.clone()),
            table_route_manager: TableRouteManager::new(kv_backend.clone()),
            tombstone_manager: TombstoneManager::new(kv_backend.clone()),
+            topic_region_manager: TopicRegionManager::new(kv_backend.clone()),
            kv_backend,
        }
    }
@@ -648,10 +652,15 @@ impl TableMetadataManager {
            .table_route_storage()
            .build_create_txn(table_id, &table_route_value)?;

+        let create_topic_region_txn = self
+            .topic_region_manager
+            .build_create_txn(table_id, &region_wal_options)?;
+
        let mut txn = Txn::merge_all(vec![
            create_table_name_txn,
            create_table_info_txn,
            create_table_route_txn,
+            create_topic_region_txn,
        ]);

        if let TableRouteValue::Physical(x) = &table_route_value {
@@ -785,6 +794,7 @@ impl TableMetadataManager {
        table_id: TableId,
        table_name: &TableName,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<RegionNumber, WalOptions>,
    ) -> Result<Vec<Vec<u8>>> {
        // Builds keys
        let datanode_ids = if table_route_value.is_physical() {
@@ -806,13 +816,22 @@ impl TableMetadataManager {
            .into_iter()
            .map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
            .collect::<HashSet<_>>();
-
+        let topic_region_map = self
+            .topic_region_manager
+            .get_topic_region_mapping(table_id, region_wal_options);
+        let topic_region_keys = topic_region_map
+            .iter()
+            .map(|(region_id, topic)| TopicRegionKey::new(*region_id, topic))
+            .collect::<Vec<_>>();
        keys.push(table_name.to_bytes());
        keys.push(table_info_key.to_bytes());
        keys.push(table_route_key.to_bytes());
        for key in &datanode_table_keys {
            keys.push(key.to_bytes());
        }
+        for key in topic_region_keys {
+            keys.push(key.to_bytes());
+        }
        Ok(keys)
    }

@@ -823,8 +842,10 @@ impl TableMetadataManager {
        table_id: TableId,
        table_name: &TableName,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<RegionNumber, WalOptions>,
    ) -> Result<()> {
-        let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
+        let keys =
+            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
        self.tombstone_manager.create(keys).await
    }

@@ -835,9 +856,11 @@ impl TableMetadataManager {
        table_id: TableId,
        table_name: &TableName,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<RegionNumber, WalOptions>,
    ) -> Result<()> {
-        let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
-        self.tombstone_manager.delete(keys).await
+        let table_metadata_keys =
+            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
+        self.tombstone_manager.delete(table_metadata_keys).await
    }

    /// Restores metadata for table.
@@ -847,8 +870,10 @@ impl TableMetadataManager {
        table_id: TableId,
        table_name: &TableName,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<RegionNumber, WalOptions>,
    ) -> Result<()> {
-        let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
+        let keys =
+            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
        self.tombstone_manager.restore(keys).await
    }

@@ -859,8 +884,10 @@ impl TableMetadataManager {
        table_id: TableId,
        table_name: &TableName,
        table_route_value: &TableRouteValue,
+        region_wal_options: &HashMap<RegionNumber, WalOptions>,
    ) -> Result<()> {
-        let keys = self.table_metadata_keys(table_id, table_name, table_route_value)?;
+        let keys =
+            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
        let _ = self
            .kv_backend
            .batch_delete(BatchDeleteRequest::new().with_keys(keys))
@@ -1309,8 +1336,9 @@ mod tests {
    use bytes::Bytes;
    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
    use common_time::util::current_time_millis;
+    use common_wal::options::{KafkaWalOptions, WalOptions};
    use futures::TryStreamExt;
-    use store_api::storage::RegionId;
+    use store_api::storage::{RegionId, RegionNumber};
    use table::metadata::{RawTableInfo, TableInfo};
    use table::table_name::TableName;

@@ -1323,10 +1351,15 @@ mod tests {
    use crate::key::table_info::TableInfoValue;
    use crate::key::table_name::TableNameKey;
    use crate::key::table_route::TableRouteValue;
-    use crate::key::{DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue};
+    use crate::key::{
+        DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue, TOPIC_REGION_PREFIX,
+    };
    use crate::kv_backend::memory::MemoryKvBackend;
+    use crate::kv_backend::KvBackend;
    use crate::peer::Peer;
    use crate::rpc::router::{region_distribution, LeaderState, Region, RegionRoute};
+    use crate::rpc::store::RangeRequest;
+    use crate::wal_options_allocator::{allocate_region_wal_options, WalOptionsAllocator};

    #[test]
    fn test_deserialized_value_with_bytes() {
@@ -1398,16 +1431,63 @@ mod tests {
        table_metadata_manager: &TableMetadataManager,
        table_info: RawTableInfo,
        region_routes: Vec<RegionRoute>,
+        region_wal_options: HashMap<RegionNumber, String>,
    ) -> Result<()> {
        table_metadata_manager
            .create_table_metadata(
                table_info,
                TableRouteValue::physical(region_routes),
-                HashMap::default(),
+                region_wal_options,
            )
            .await
    }

+    fn create_mock_region_wal_options() -> HashMap<RegionNumber, WalOptions> {
+        let topics = (0..2)
+            .map(|i| format!("greptimedb_topic{}", i))
+            .collect::<Vec<_>>();
+        let wal_options = topics
+            .iter()
+            .map(|topic| {
+                WalOptions::Kafka(KafkaWalOptions {
+                    topic: topic.clone(),
+                })
+            })
+            .collect::<Vec<_>>();
+
+        (0..16)
+            .enumerate()
+            .map(|(i, region_number)| (region_number, wal_options[i % wal_options.len()].clone()))
+            .collect()
+    }
+
+    #[tokio::test]
+    async fn test_raft_engine_topic_region_map() {
+        let mem_kv = Arc::new(MemoryKvBackend::default());
+        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
+        let region_route = new_test_region_route();
+        let region_routes = &vec![region_route.clone()];
+        let table_info: RawTableInfo =
+            new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
+        let wal_allocator = WalOptionsAllocator::RaftEngine;
+        let regions = (0..16).collect();
+        let region_wal_options = allocate_region_wal_options(regions, &wal_allocator).unwrap();
+        create_physical_table_metadata(
+            &table_metadata_manager,
+            table_info.clone(),
+            region_routes.clone(),
+            region_wal_options.clone(),
+        )
+        .await
+        .unwrap();
+
+        let topic_region_key = TOPIC_REGION_PREFIX.to_string();
+        let range_req = RangeRequest::new().with_prefix(topic_region_key);
+        let resp = mem_kv.range(range_req).await.unwrap();
+        // Should be empty because the topic region map is empty for raft engine.
+        assert!(resp.kvs.is_empty());
+    }
+
    #[tokio::test]
    async fn test_create_table_metadata() {
        let mem_kv = Arc::new(MemoryKvBackend::default());
@@ -1416,11 +1496,17 @@ mod tests {
        let region_routes = &vec![region_route.clone()];
        let table_info: RawTableInfo =
            new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
+        let region_wal_options = create_mock_region_wal_options()
+            .into_iter()
+            .map(|(k, v)| (k, serde_json::to_string(&v).unwrap()))
+            .collect::<HashMap<_, _>>();
+
        // creates metadata.
        create_physical_table_metadata(
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            region_wal_options.clone(),
        )
        .await
        .unwrap();
@@ -1430,6 +1516,7 @@ mod tests {
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            region_wal_options.clone(),
        )
        .await
        .is_ok());
@@ -1440,7 +1527,8 @@ mod tests {
        assert!(create_physical_table_metadata(
            &table_metadata_manager,
            table_info.clone(),
-            modified_region_routes
+            modified_region_routes,
+            region_wal_options.clone(),
        )
        .await
        .is_err());
@@ -1462,6 +1550,19 @@ mod tests {
                .unwrap(),
            region_routes
        );
+
+        for i in 0..2 {
+            let region_number = i as u32;
+            let region_id = RegionId::new(table_info.ident.table_id, region_number);
+            let topic = format!("greptimedb_topic{}", i);
+            let regions = table_metadata_manager
+                .topic_region_manager
+                .regions(&topic)
+                .await
+                .unwrap();
+            assert_eq!(regions.len(), 8);
+            assert_eq!(regions[0], region_id);
+        }
    }

    #[tokio::test]
@@ -1557,12 +1658,18 @@ mod tests {
            new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
        let table_id = table_info.ident.table_id;
        let datanode_id = 2;
+        let region_wal_options = create_mock_region_wal_options();
+        let serialized_region_wal_options = region_wal_options
+            .iter()
+            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
+            .collect::<HashMap<_, _>>();

        // creates metadata.
        create_physical_table_metadata(
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            serialized_region_wal_options,
        )
        .await
        .unwrap();
@@ -1575,12 +1682,22 @@ mod tests {
        let table_route_value = &TableRouteValue::physical(region_routes.clone());
        // deletes metadata.
        table_metadata_manager
-            .delete_table_metadata(table_id, &table_name, table_route_value)
+            .delete_table_metadata(
+                table_id,
+                &table_name,
+                table_route_value,
+                &region_wal_options,
+            )
            .await
            .unwrap();
        // Should be ignored.
        table_metadata_manager
-            .delete_table_metadata(table_id, &table_name, table_route_value)
+            .delete_table_metadata(
+                table_id,
+                &table_name,
+                table_route_value,
+                &region_wal_options,
+            )
            .await
            .unwrap();
        assert!(table_metadata_manager
@@ -1617,6 +1734,19 @@ mod tests {
            .await
            .unwrap();
        assert!(table_route.is_none());
+        // Logical delete removes the topic region mapping as well.
+        let regions = table_metadata_manager
+            .topic_region_manager
+            .regions("greptimedb_topic0")
+            .await
+            .unwrap();
+        assert_eq!(regions.len(), 0);
+        let regions = table_metadata_manager
+            .topic_region_manager
+            .regions("greptimedb_topic1")
+            .await
+            .unwrap();
+        assert_eq!(regions.len(), 0);
    }

    #[tokio::test]
@@ -1633,6 +1763,7 @@ mod tests {
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            HashMap::new(),
        )
        .await
        .unwrap();
@@ -1705,6 +1836,7 @@ mod tests {
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            HashMap::new(),
        )
        .await
        .unwrap();
@@ -1790,6 +1922,7 @@ mod tests {
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            HashMap::new(),
        )
        .await
        .unwrap();
@@ -1870,6 +2003,7 @@ mod tests {
            &table_metadata_manager,
            table_info.clone(),
            region_routes.clone(),
+            HashMap::new(),
        )
        .await
        .unwrap();
@@ -1980,7 +2114,11 @@ mod tests {
        let table_id = 1025;
        let table_name = "foo";
        let task = test_create_table_task(table_name, table_id);
-        let options = [(0, "test".to_string())].into();
+        let options = create_mock_region_wal_options();
+        let serialized_options = options
+            .iter()
+            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
+            .collect::<HashMap<_, _>>();
        table_metadata_manager
            .create_table_metadata(
                task.table_info,
@@ -2007,7 +2145,7 @@ mod tests {
                        leader_down_since: None,
                    },
                ]),
-                options,
+                serialized_options,
            )
            .await
            .unwrap();
@@ -2020,7 +2158,7 @@ mod tests {
            .unwrap()
            .unwrap();
        table_metadata_manager
-            .destroy_table_metadata(table_id, &table_name, &table_route_value)
+            .destroy_table_metadata(table_id, &table_name, &table_route_value, &options)
            .await
            .unwrap();
        assert!(mem_kv.is_empty());
@@ -2033,7 +2171,11 @@ mod tests {
        let table_id = 1025;
        let table_name = "foo";
        let task = test_create_table_task(table_name, table_id);
-        let options = [(0, "test".to_string())].into();
+        let options = create_mock_region_wal_options();
+        let serialized_options = options
+            .iter()
+            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
+            .collect::<HashMap<_, _>>();
        table_metadata_manager
            .create_table_metadata(
                task.table_info,
@@ -2060,7 +2202,7 @@ mod tests {
                        leader_down_since: None,
                    },
                ]),
-                options,
+                serialized_options,
            )
            .await
            .unwrap();
@@ -2076,18 +2218,18 @@ mod tests {
        let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
        let table_route_value = TableRouteValue::physical(region_routes.clone());
        table_metadata_manager
-            .delete_table_metadata(table_id, &table_name, &table_route_value)
+            .delete_table_metadata(table_id, &table_name, &table_route_value, &options)
            .await
            .unwrap();
        table_metadata_manager
-            .restore_table_metadata(table_id, &table_name, &table_route_value)
+            .restore_table_metadata(table_id, &table_name, &table_route_value, &options)
            .await
            .unwrap();
        let kvs = mem_kv.dump();
        assert_eq!(kvs, expected_result);
        // Should be ignored.
        table_metadata_manager
-            .restore_table_metadata(table_id, &table_name, &table_route_value)
+            .restore_table_metadata(table_id, &table_name, &table_route_value, &options)
            .await
            .unwrap();
        let kvs = mem_kv.dump();
--- a/src/common/meta/src/key/datanode_table.rs
+++ b/src/common/meta/src/key/datanode_table.rs
@@ -21,6 +21,7 @@ use snafu::OptionExt;
 use store_api::storage::RegionNumber;
 use table::metadata::TableId;

+use super::table_route::PhysicalTableRouteValue;
 use super::MetadataKey;
 use crate::error::{DatanodeTableInfoNotFoundSnafu, InvalidMetadataSnafu, Result};
 use crate::key::{
@@ -29,7 +30,8 @@ use crate::key::{
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
 use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
-use crate::rpc::store::RangeRequest;
+use crate::rpc::router::region_distribution;
+use crate::rpc::store::{BatchGetRequest, RangeRequest};
 use crate::rpc::KeyValue;
 use crate::DatanodeId;

@@ -172,6 +174,26 @@ impl DatanodeTableManager {
        Box::pin(stream)
    }

+    /// Find the [DatanodeTableValue]s for the given [TableId] and [PhysicalTableRouteValue].
+    pub async fn regions(
+        &self,
+        table_id: TableId,
+        table_routes: &PhysicalTableRouteValue,
+    ) -> Result<Vec<DatanodeTableValue>> {
+        let keys = region_distribution(&table_routes.region_routes)
+            .into_keys()
+            .map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
+            .collect::<Vec<_>>();
+        let req = BatchGetRequest {
+            keys: keys.iter().map(|k| k.to_bytes()).collect(),
+        };
+        let resp = self.kv_backend.batch_get(req).await?;
+        resp.kvs
+            .into_iter()
+            .map(datanode_table_value_decoder)
+            .collect()
+    }
+
    /// Builds the create datanode table transactions. It only executes while the primary keys comparing successes.
    pub fn build_create_txn(
        &self,
--- a/src/common/meta/src/key/flow.rs
+++ b/src/common/meta/src/key/flow.rs
@@ -16,9 +16,9 @@ pub mod flow_info;
 pub(crate) mod flow_name;
 pub(crate) mod flow_route;
 pub mod flow_state;
+mod flownode_addr_helper;
 pub(crate) mod flownode_flow;
 pub(crate) mod table_flow;
-
 use std::ops::Deref;
 use std::sync::Arc;

@@ -506,7 +506,6 @@ mod tests {
        let routes = flow_metadata_manager
            .flow_route_manager()
            .routes(flow_id)
-            .try_collect::<Vec<_>>()
            .await
            .unwrap();
        assert_eq!(
@@ -538,7 +537,6 @@ mod tests {
            let nodes = flow_metadata_manager
                .table_flow_manager()
                .flows(table_id)
-                .try_collect::<Vec<_>>()
                .await
                .unwrap();
            assert_eq!(
@@ -727,7 +725,6 @@ mod tests {
        let routes = flow_metadata_manager
            .flow_route_manager()
            .routes(flow_id)
-            .try_collect::<Vec<_>>()
            .await
            .unwrap();
        assert_eq!(
@@ -759,7 +756,6 @@ mod tests {
            let nodes = flow_metadata_manager
                .table_flow_manager()
                .flows(table_id)
-                .try_collect::<Vec<_>>()
                .await
                .unwrap();
            assert_eq!(
--- a/src/common/meta/src/key/flow/flow_name.rs
+++ b/src/common/meta/src/key/flow/flow_name.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use common_telemetry::warn;
 use futures::stream::BoxStream;
 use lazy_static::lazy_static;
 use regex::Regex;
@@ -37,6 +38,12 @@ lazy_static! {
        "^{FLOW_NAME_KEY_PREFIX}/({NAME_PATTERN})/({NAME_PATTERN})$"
    ))
    .unwrap();
+
+    /// for compatibility with older flow name with less strict name pattern
+    static ref COMPAT_FLOW_NAME_KEY_PATTERN: Regex = Regex::new(&format!(
+        "^{FLOW_NAME_KEY_PREFIX}/({NAME_PATTERN})/(.*)$"
+    ))
+    .unwrap();
 }

 /// The key of mapping {flow_name} to [FlowId].
@@ -114,12 +121,18 @@ impl<'a> MetadataKey<'a, FlowNameKeyInner<'a>> for FlowNameKeyInner<'_> {
            }
            .build()
        })?;
-        let captures =
-            FLOW_NAME_KEY_PATTERN
-                .captures(key)
-                .context(error::InvalidMetadataSnafu {
-                    err_msg: format!("Invalid FlowNameKeyInner '{key}'"),
-                })?;
+        let captures = FLOW_NAME_KEY_PATTERN
+            .captures(key)
+            .or_else(|| {
+                warn!(
+                    "FlowNameKeyInner '{}' is not a valid flow name in newer version.",
+                    key
+                );
+                COMPAT_FLOW_NAME_KEY_PATTERN.captures(key)
+            })
+            .context(error::InvalidMetadataSnafu {
+                err_msg: format!("Invalid FlowNameKeyInner '{key}'"),
+            })?;
        // Safety: pass the regex check above
        let catalog_name = captures.get(1).unwrap().as_str();
        let flow_name = captures.get(2).unwrap().as_str();
@@ -284,6 +297,12 @@ mod tests {
        let key = FlowNameKey::from_bytes(&bytes).unwrap();
        assert_eq!(key.catalog(), "my_catalog");
        assert_eq!(key.flow_name(), "my_task");
+
+        // compatibility with older version
+        let bytes = b"__flow/name/my_catalog/a/`b`".to_vec();
+        let key = FlowNameKey::from_bytes(&bytes).unwrap();
+        assert_eq!(key.catalog(), "my_catalog");
+        assert_eq!(key.flow_name(), "a/`b`");
    }
    #[test]
    fn test_key_start_range() {
--- a/src/common/meta/src/key/flow/flow_route.rs
+++ b/src/common/meta/src/key/flow/flow_route.rs
@@ -12,14 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use futures::stream::BoxStream;
+use futures::TryStreamExt;
 use lazy_static::lazy_static;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use snafu::OptionExt;

 use crate::error::{self, Result};
-use crate::key::flow::FlowScoped;
+use crate::key::flow::{flownode_addr_helper, FlowScoped};
+use crate::key::node_address::NodeAddressKey;
 use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
@@ -167,10 +168,7 @@ impl FlowRouteManager {
    }

    /// Retrieves all [FlowRouteValue]s of the specified `flow_id`.
-    pub fn routes(
-        &self,
-        flow_id: FlowId,
-    ) -> BoxStream<'static, Result<(FlowRouteKey, FlowRouteValue)>> {
+    pub async fn routes(&self, flow_id: FlowId) -> Result<Vec<(FlowRouteKey, FlowRouteValue)>> {
        let start_key = FlowRouteKey::range_start_key(flow_id);
        let req = RangeRequest::new().with_prefix(start_key);
        let stream = PaginationStream::new(
@@ -181,7 +179,9 @@ impl FlowRouteManager {
        )
        .into_stream();

-        Box::pin(stream)
+        let mut res = stream.try_collect::<Vec<_>>().await?;
+        self.remap_flow_route_addresses(&mut res).await?;
+        Ok(res)
    }

    /// Builds a create flow routes transaction.
@@ -203,6 +203,28 @@ impl FlowRouteManager {

        Ok(Txn::new().and_then(txns))
    }
+
+    async fn remap_flow_route_addresses(
+        &self,
+        flow_routes: &mut [(FlowRouteKey, FlowRouteValue)],
+    ) -> Result<()> {
+        let keys = flow_routes
+            .iter()
+            .map(|(_, value)| NodeAddressKey::with_flownode(value.peer.id))
+            .collect();
+        let flow_node_addrs =
+            flownode_addr_helper::get_flownode_addresses(&self.kv_backend, keys).await?;
+        for (_, flow_route_value) in flow_routes.iter_mut() {
+            let flownode_id = flow_route_value.peer.id;
+            // If an id lacks a corresponding address in the `flow_node_addrs`,
+            // it means the old address in `table_flow_value` is still valid,
+            // which is expected.
+            if let Some(node_addr) = flow_node_addrs.get(&flownode_id) {
+                flow_route_value.peer.addr = node_addr.peer.addr.clone();
+            }
+        }
+        Ok(())
+    }
 }

 #[cfg(test)]
--- a/src/common/meta/src/key/flow/flownode_addr_helper.rs
+++ b/src/common/meta/src/key/flow/flownode_addr_helper.rs
@@ -0,0 +1,47 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use crate::error::Result;
+use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
+use crate::key::{MetadataKey, MetadataValue};
+use crate::kv_backend::KvBackendRef;
+use crate::rpc::store::BatchGetRequest;
+
+/// Get the addresses of the flownodes.
+/// The result is a map: node_id -> NodeAddressValue
+pub(crate) async fn get_flownode_addresses(
+    kv_backend: &KvBackendRef,
+    keys: Vec<NodeAddressKey>,
+) -> Result<HashMap<u64, NodeAddressValue>> {
+    if keys.is_empty() {
+        return Ok(HashMap::default());
+    }
+
+    let req = BatchGetRequest {
+        keys: keys.into_iter().map(|k| k.to_bytes()).collect(),
+    };
+    kv_backend
+        .batch_get(req)
+        .await?
+        .kvs
+        .into_iter()
+        .map(|kv| {
+            let key = NodeAddressKey::from_bytes(&kv.key)?;
+            let value = NodeAddressValue::try_from_raw_value(&kv.value)?;
+            Ok((key.node_id, value))
+        })
+        .collect()
+}
--- a/src/common/meta/src/key/flow/table_flow.rs
+++ b/src/common/meta/src/key/flow/table_flow.rs
@@ -14,7 +14,7 @@

 use std::sync::Arc;

-use futures::stream::BoxStream;
+use futures::TryStreamExt;
 use lazy_static::lazy_static;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@@ -22,7 +22,8 @@ use snafu::OptionExt;
 use table::metadata::TableId;

 use crate::error::{self, Result};
-use crate::key::flow::FlowScoped;
+use crate::key::flow::{flownode_addr_helper, FlowScoped};
+use crate::key::node_address::NodeAddressKey;
 use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
@@ -196,10 +197,7 @@ impl TableFlowManager {
    /// Retrieves all [TableFlowKey]s of the specified `table_id`.
    ///
    /// TODO(discord9): add cache for it since range request does not support cache.
-    pub fn flows(
-        &self,
-        table_id: TableId,
-    ) -> BoxStream<'static, Result<(TableFlowKey, TableFlowValue)>> {
+    pub async fn flows(&self, table_id: TableId) -> Result<Vec<(TableFlowKey, TableFlowValue)>> {
        let start_key = TableFlowKey::range_start_key(table_id);
        let req = RangeRequest::new().with_prefix(start_key);
        let stream = PaginationStream::new(
@@ -210,7 +208,9 @@ impl TableFlowManager {
        )
        .into_stream();

-        Box::pin(stream)
+        let mut res = stream.try_collect::<Vec<_>>().await?;
+        self.remap_table_flow_addresses(&mut res).await?;
+        Ok(res)
    }

    /// Builds a create table flow transaction.
@@ -238,6 +238,28 @@ impl TableFlowManager {

        Ok(Txn::new().and_then(txns))
    }
+
+    async fn remap_table_flow_addresses(
+        &self,
+        table_flows: &mut [(TableFlowKey, TableFlowValue)],
+    ) -> Result<()> {
+        let keys = table_flows
+            .iter()
+            .map(|(_, value)| NodeAddressKey::with_flownode(value.peer.id))
+            .collect::<Vec<_>>();
+        let flownode_addrs =
+            flownode_addr_helper::get_flownode_addresses(&self.kv_backend, keys).await?;
+        for (_, table_flow_value) in table_flows.iter_mut() {
+            let flownode_id = table_flow_value.peer.id;
+            // If an id lacks a corresponding address in the `flow_node_addrs`,
+            // it means the old address in `table_flow_value` is still valid,
+            // which is expected.
+            if let Some(flownode_addr) = flownode_addrs.get(&flownode_id) {
+                table_flow_value.peer.addr = flownode_addr.peer.addr.clone();
+            }
+        }
+        Ok(())
+    }
 }

 #[cfg(test)]
--- a/src/common/meta/src/key/node_address.rs
+++ b/src/common/meta/src/key/node_address.rs
@@ -39,6 +39,10 @@ impl NodeAddressKey {
    pub fn with_datanode(node_id: u64) -> Self {
        Self::new(Role::Datanode, node_id)
    }
+
+    pub fn with_flownode(node_id: u64) -> Self {
+        Self::new(Role::Flownode, node_id)
+    }
 }

 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
--- a/src/common/meta/src/key/topic_region.rs
+++ b/src/common/meta/src/key/topic_region.rs
@@ -26,18 +26,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
 use std::fmt::{self, Display};

+use common_wal::options::WalOptions;
 use serde::{Deserialize, Serialize};
 use snafu::OptionExt;
-use store_api::storage::RegionId;
+use store_api::storage::{RegionId, RegionNumber};
+use table::metadata::TableId;

+use crate::ddl::utils::parse_region_wal_options;
 use crate::error::{Error, InvalidMetadataSnafu, Result};
 use crate::key::{MetadataKey, TOPIC_REGION_PATTERN, TOPIC_REGION_PREFIX};
+use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
-use crate::rpc::store::{BatchPutRequest, PutRequest, RangeRequest};
+use crate::rpc::store::{BatchDeleteRequest, BatchPutRequest, PutRequest, RangeRequest};
 use crate::rpc::KeyValue;

+// The TopicRegionKey is a key for the topic-region mapping in the kvbackend.
+// The layout of the key is `__topic_region/{topic_name}/{region_id}`.
 #[derive(Debug, Clone, PartialEq)]
 pub struct TopicRegionKey<'a> {
    pub region_id: RegionId,
@@ -53,7 +60,7 @@ impl<'a> TopicRegionKey<'a> {
    }

    pub fn range_topic_key(topic: &str) -> String {
-        format!("{}/{}", TOPIC_REGION_PREFIX, topic)
+        format!("{}/{}/", TOPIC_REGION_PREFIX, topic)
    }
 }

@@ -80,7 +87,7 @@ impl Display for TopicRegionKey<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
-            "{}/{}",
+            "{}{}",
            Self::range_topic_key(self.topic),
            self.region_id.as_u64()
        )
@@ -151,6 +158,24 @@ impl TopicRegionManager {
        Ok(())
    }

+    pub fn build_create_txn(
+        &self,
+        table_id: TableId,
+        region_wal_options: &HashMap<RegionNumber, String>,
+    ) -> Result<Txn> {
+        let region_wal_options = parse_region_wal_options(region_wal_options)?;
+        let topic_region_mapping = self.get_topic_region_mapping(table_id, &region_wal_options);
+        let topic_region_keys = topic_region_mapping
+            .iter()
+            .map(|(topic, region_id)| TopicRegionKey::new(*topic, region_id))
+            .collect::<Vec<_>>();
+        let operations = topic_region_keys
+            .into_iter()
+            .map(|key| TxnOp::Put(key.to_bytes(), vec![]))
+            .collect::<Vec<_>>();
+        Ok(Txn::new().and_then(operations))
+    }
+
    /// Returns the list of region ids using specified topic.
    pub async fn regions(&self, topic: &str) -> Result<Vec<RegionId>> {
        let prefix = TopicRegionKey::range_topic_key(topic);
@@ -169,12 +194,49 @@ impl TopicRegionManager {
        self.kv_backend.delete(&raw_key, false).await?;
        Ok(())
    }
+
+    pub async fn batch_delete(&self, keys: Vec<TopicRegionKey<'_>>) -> Result<()> {
+        let raw_keys = keys.iter().map(|key| key.to_bytes()).collect::<Vec<_>>();
+        let req = BatchDeleteRequest {
+            keys: raw_keys,
+            prev_kv: false,
+        };
+        self.kv_backend.batch_delete(req).await?;
+        Ok(())
+    }
+
+    /// Retrieves a mapping of [`RegionId`]s to their corresponding topics name
+    /// based on the provided table ID and WAL options.
+    ///
+    /// # Returns
+    /// A vector of tuples, where each tuple contains a [`RegionId`] and its corresponding topic name.
+    pub fn get_topic_region_mapping<'a>(
+        &self,
+        table_id: TableId,
+        region_wal_options: &'a HashMap<RegionNumber, WalOptions>,
+    ) -> Vec<(RegionId, &'a str)> {
+        region_wal_options
+            .keys()
+            .filter_map(
+                |region_number| match region_wal_options.get(region_number) {
+                    Some(WalOptions::Kafka(kafka)) => {
+                        let region_id = RegionId::new(table_id, *region_number);
+                        Some((region_id, kafka.topic.as_str()))
+                    }
+                    Some(WalOptions::RaftEngine) => None,
+                    None => None,
+                },
+            )
+            .collect::<Vec<_>>()
+    }
 }

 #[cfg(test)]
 mod tests {
    use std::sync::Arc;

+    use common_wal::options::KafkaWalOptions;
+
    use super::*;
    use crate::kv_backend::memory::MemoryKvBackend;

@@ -220,4 +282,45 @@ mod tests {
        key_values.sort_by_key(|id| id.as_u64());
        assert_eq!(key_values, expected);
    }
+
+    #[test]
+    fn test_topic_region_map() {
+        let kv_backend = Arc::new(MemoryKvBackend::default());
+        let manager = TopicRegionManager::new(kv_backend.clone());
+
+        let table_id = 1;
+        let region_wal_options = (0..64)
+            .map(|i| {
+                let region_number = i;
+                let wal_options = if i % 2 == 0 {
+                    WalOptions::Kafka(KafkaWalOptions {
+                        topic: format!("topic_{}", i),
+                    })
+                } else {
+                    WalOptions::RaftEngine
+                };
+                (region_number, serde_json::to_string(&wal_options).unwrap())
+            })
+            .collect::<HashMap<_, _>>();
+
+        let region_wal_options = parse_region_wal_options(&region_wal_options).unwrap();
+        let mut topic_region_mapping =
+            manager.get_topic_region_mapping(table_id, &region_wal_options);
+        let mut expected = (0..64)
+            .filter_map(|i| {
+                if i % 2 == 0 {
+                    Some((RegionId::new(table_id, i), format!("topic_{}", i)))
+                } else {
+                    None
+                }
+            })
+            .collect::<Vec<_>>();
+        topic_region_mapping.sort_by_key(|(region_id, _)| region_id.as_u64());
+        let topic_region_map = topic_region_mapping
+            .iter()
+            .map(|(region_id, topic)| (*region_id, topic.to_string()))
+            .collect::<Vec<_>>();
+        expected.sort_by_key(|(region_id, _)| region_id.as_u64());
+        assert_eq!(topic_region_map, expected);
+    }
 }
--- a/src/common/meta/src/kv_backend.rs
+++ b/src/common/meta/src/kv_backend.rs
@@ -32,7 +32,7 @@ pub mod chroot;
 pub mod etcd;
 pub mod memory;
 #[cfg(feature = "pg_kvbackend")]
-pub mod postgres;
+pub mod rds;
 pub mod test;
 pub mod txn;

--- a/src/common/meta/src/kv_backend/postgres.rs
+++ b/src/common/meta/src/kv_backend/postgres.rs
--- a/src/common/meta/src/kv_backend/rds.rs
+++ b/src/common/meta/src/kv_backend/rds.rs
@@ -0,0 +1,548 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::marker::PhantomData;
+use std::time::Duration;
+
+use backon::{BackoffBuilder, ExponentialBuilder};
+use common_telemetry::debug;
+
+use crate::error::{Error, RdsTransactionRetryFailedSnafu, Result};
+use crate::kv_backend::txn::{
+    Compare, Txn as KvTxn, TxnOp, TxnOpResponse, TxnResponse as KvTxnResponse,
+};
+use crate::kv_backend::{KvBackend, TxnService};
+use crate::metrics::METRIC_META_TXN_REQUEST;
+use crate::rpc::store::{
+    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
+    BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse,
+    RangeRequest, RangeResponse,
+};
+use crate::rpc::KeyValue;
+
+mod postgres;
+
+pub use postgres::PgStore;
+
+const RDS_STORE_TXN_RETRY_COUNT: usize = 3;
+
+/// Query executor for rds. It can execute queries or generate a transaction executor.
+#[async_trait::async_trait]
+pub trait Executor: Send + Sync {
+    type Transaction<'a>: 'a + Transaction<'a>
+    where
+        Self: 'a;
+
+    fn name() -> &'static str;
+
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>>;
+
+    /// Some queries don't need to return any result, such as `DELETE`.
+    async fn execute(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<()> {
+        self.query(query, params).await?;
+        Ok(())
+    }
+
+    async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>>;
+}
+
+/// Transaction query executor for rds. It can execute queries in transaction or commit the transaction.
+#[async_trait::async_trait]
+pub trait Transaction<'a>: Send + Sync {
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>>;
+
+    async fn execute(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<()> {
+        self.query(query, params).await?;
+        Ok(())
+    }
+
+    async fn commit(self) -> Result<()>;
+}
+
+/// Factory for creating default and transaction query executors.
+#[async_trait::async_trait]
+pub trait ExecutorFactory<T: Executor>: Send + Sync {
+    async fn default_executor(&self) -> Result<T>;
+
+    async fn txn_executor<'a>(&self, default_executor: &'a mut T) -> Result<T::Transaction<'a>>;
+}
+
+/// Rds backed store for metsrv
+pub struct RdsStore<T, S, R>
+where
+    T: Executor + Send + Sync,
+    S: ExecutorFactory<T> + Send + Sync,
+{
+    max_txn_ops: usize,
+    txn_retry_count: usize,
+    executor_factory: S,
+    sql_template_set: R,
+    _phantom: PhantomData<T>,
+}
+
+pub enum ExecutorImpl<'a, T: Executor + 'a> {
+    Default(T),
+    Txn(T::Transaction<'a>),
+}
+
+impl<T: Executor> ExecutorImpl<'_, T> {
+    async fn query(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<Vec<KeyValue>> {
+        match self {
+            Self::Default(executor) => executor.query(query, params).await,
+            Self::Txn(executor) => executor.query(query, params).await,
+        }
+    }
+
+    async fn commit(self) -> Result<()> {
+        match self {
+            Self::Txn(executor) => executor.commit().await,
+            _ => Ok(()),
+        }
+    }
+}
+
+#[async_trait::async_trait]
+pub trait KvQueryExecutor<T: Executor> {
+    async fn range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: RangeRequest,
+    ) -> Result<RangeResponse>;
+
+    async fn put_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: PutRequest,
+    ) -> Result<PutResponse> {
+        let kv = KeyValue {
+            key: req.key,
+            value: req.value,
+        };
+        let mut res = self
+            .batch_put_with_query_executor(
+                query_executor,
+                BatchPutRequest {
+                    kvs: vec![kv],
+                    prev_kv: req.prev_kv,
+                },
+            )
+            .await?;
+
+        if !res.prev_kvs.is_empty() {
+            debug_assert!(req.prev_kv);
+            return Ok(PutResponse {
+                prev_kv: Some(res.prev_kvs.remove(0)),
+            });
+        }
+        Ok(PutResponse::default())
+    }
+
+    async fn batch_put_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: BatchPutRequest,
+    ) -> Result<BatchPutResponse>;
+
+    /// Batch get with certain client. It's needed for a client with transaction.
+    async fn batch_get_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: BatchGetRequest,
+    ) -> Result<BatchGetResponse>;
+
+    async fn delete_range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: DeleteRangeRequest,
+    ) -> Result<DeleteRangeResponse>;
+
+    async fn batch_delete_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: BatchDeleteRequest,
+    ) -> Result<BatchDeleteResponse>;
+}
+
+impl<T, S, R> RdsStore<T, S, R>
+where
+    Self: KvQueryExecutor<T> + Send + Sync,
+    T: Executor + Send + Sync,
+    S: ExecutorFactory<T> + Send + Sync,
+{
+    async fn execute_txn_cmp(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        cmp: &[Compare],
+    ) -> Result<bool> {
+        let batch_get_req = BatchGetRequest {
+            keys: cmp.iter().map(|c| c.key.clone()).collect(),
+        };
+        let res = self
+            .batch_get_with_query_executor(query_executor, batch_get_req)
+            .await?;
+        debug!("batch get res: {:?}", res);
+        let res_map = res
+            .kvs
+            .into_iter()
+            .map(|kv| (kv.key, kv.value))
+            .collect::<HashMap<Vec<u8>, Vec<u8>>>();
+        for c in cmp {
+            let value = res_map.get(&c.key);
+            if !c.compare_value(value) {
+                return Ok(false);
+            }
+        }
+        Ok(true)
+    }
+
+    /// Execute a batch of transaction operations. This function is only used for transactions with the same operation type.
+    async fn try_batch_txn(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        if !check_txn_ops(txn_ops)? {
+            return Ok(None);
+        }
+        // Safety: txn_ops is not empty
+        match txn_ops.first().unwrap() {
+            TxnOp::Delete(_) => self.handle_batch_delete(query_executor, txn_ops).await,
+            TxnOp::Put(_, _) => self.handle_batch_put(query_executor, txn_ops).await,
+            TxnOp::Get(_) => self.handle_batch_get(query_executor, txn_ops).await,
+        }
+    }
+
+    async fn handle_batch_delete(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        let mut batch_del_req = BatchDeleteRequest {
+            keys: vec![],
+            prev_kv: true,
+        };
+        for op in txn_ops {
+            if let TxnOp::Delete(key) = op {
+                batch_del_req.keys.push(key.clone());
+            }
+        }
+        let res = self
+            .batch_delete_with_query_executor(query_executor, batch_del_req)
+            .await?;
+        let res_map = res
+            .prev_kvs
+            .into_iter()
+            .map(|kv| (kv.key, kv.value))
+            .collect::<HashMap<Vec<u8>, Vec<u8>>>();
+        let mut resps = Vec::with_capacity(txn_ops.len());
+        for op in txn_ops {
+            if let TxnOp::Delete(key) = op {
+                let value = res_map.get(key);
+                resps.push(TxnOpResponse::ResponseDelete(DeleteRangeResponse {
+                    deleted: if value.is_some() { 1 } else { 0 },
+                    prev_kvs: vec![],
+                }));
+            }
+        }
+        Ok(Some(resps))
+    }
+
+    async fn handle_batch_put(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        let mut batch_put_req = BatchPutRequest {
+            kvs: vec![],
+            prev_kv: false,
+        };
+        for op in txn_ops {
+            if let TxnOp::Put(key, value) = op {
+                batch_put_req.kvs.push(KeyValue {
+                    key: key.clone(),
+                    value: value.clone(),
+                });
+            }
+        }
+        let _ = self
+            .batch_put_with_query_executor(query_executor, batch_put_req)
+            .await?;
+        let mut resps = Vec::with_capacity(txn_ops.len());
+        for op in txn_ops {
+            if let TxnOp::Put(_, _) = op {
+                resps.push(TxnOpResponse::ResponsePut(PutResponse { prev_kv: None }));
+            }
+        }
+        Ok(Some(resps))
+    }
+
+    async fn handle_batch_get(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        let mut batch_get_req = BatchGetRequest { keys: vec![] };
+        for op in txn_ops {
+            if let TxnOp::Get(key) = op {
+                batch_get_req.keys.push(key.clone());
+            }
+        }
+        let res = self
+            .batch_get_with_query_executor(query_executor, batch_get_req)
+            .await?;
+        let res_map = res
+            .kvs
+            .into_iter()
+            .map(|kv| (kv.key, kv.value))
+            .collect::<HashMap<Vec<u8>, Vec<u8>>>();
+        let mut resps = Vec::with_capacity(txn_ops.len());
+        for op in txn_ops {
+            if let TxnOp::Get(key) = op {
+                let value = res_map.get(key);
+                resps.push(TxnOpResponse::ResponseGet(RangeResponse {
+                    kvs: value
+                        .map(|v| {
+                            vec![KeyValue {
+                                key: key.clone(),
+                                value: v.clone(),
+                            }]
+                        })
+                        .unwrap_or_default(),
+                    more: false,
+                }));
+            }
+        }
+        Ok(Some(resps))
+    }
+
+    async fn execute_txn_op(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        op: &TxnOp,
+    ) -> Result<TxnOpResponse> {
+        match op {
+            TxnOp::Put(key, value) => {
+                let res = self
+                    .put_with_query_executor(
+                        query_executor,
+                        PutRequest {
+                            key: key.clone(),
+                            value: value.clone(),
+                            prev_kv: false,
+                        },
+                    )
+                    .await?;
+                Ok(TxnOpResponse::ResponsePut(res))
+            }
+            TxnOp::Get(key) => {
+                let res = self
+                    .range_with_query_executor(
+                        query_executor,
+                        RangeRequest {
+                            key: key.clone(),
+                            range_end: vec![],
+                            limit: 1,
+                            keys_only: false,
+                        },
+                    )
+                    .await?;
+                Ok(TxnOpResponse::ResponseGet(res))
+            }
+            TxnOp::Delete(key) => {
+                let res = self
+                    .delete_range_with_query_executor(
+                        query_executor,
+                        DeleteRangeRequest {
+                            key: key.clone(),
+                            range_end: vec![],
+                            prev_kv: false,
+                        },
+                    )
+                    .await?;
+                Ok(TxnOpResponse::ResponseDelete(res))
+            }
+        }
+    }
+
+    async fn txn_inner(&self, txn: &KvTxn) -> Result<KvTxnResponse> {
+        let mut default_executor = self.executor_factory.default_executor().await?;
+        let mut txn_executor = ExecutorImpl::Txn(
+            self.executor_factory
+                .txn_executor(&mut default_executor)
+                .await?,
+        );
+        let mut success = true;
+        if txn.c_when {
+            success = self
+                .execute_txn_cmp(&mut txn_executor, &txn.req.compare)
+                .await?;
+        }
+        let mut responses = vec![];
+        if success && txn.c_then {
+            match self
+                .try_batch_txn(&mut txn_executor, &txn.req.success)
+                .await?
+            {
+                Some(res) => responses.extend(res),
+                None => {
+                    for txnop in &txn.req.success {
+                        let res = self.execute_txn_op(&mut txn_executor, txnop).await?;
+                        responses.push(res);
+                    }
+                }
+            }
+        } else if !success && txn.c_else {
+            match self
+                .try_batch_txn(&mut txn_executor, &txn.req.failure)
+                .await?
+            {
+                Some(res) => responses.extend(res),
+                None => {
+                    for txnop in &txn.req.failure {
+                        let res = self.execute_txn_op(&mut txn_executor, txnop).await?;
+                        responses.push(res);
+                    }
+                }
+            }
+        }
+
+        txn_executor.commit().await?;
+        Ok(KvTxnResponse {
+            responses,
+            succeeded: success,
+        })
+    }
+}
+
+#[async_trait::async_trait]
+impl<T, S, R> KvBackend for RdsStore<T, S, R>
+where
+    R: 'static,
+    Self: KvQueryExecutor<T> + Send + Sync,
+    T: Executor + 'static,
+    S: ExecutorFactory<T> + 'static,
+{
+    fn name(&self) -> &str {
+        T::name()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.range_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.put_with_query_executor(&mut query_executor, req).await
+    }
+
+    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.batch_put_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.batch_get_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.delete_range_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.batch_delete_with_query_executor(&mut query_executor, req)
+            .await
+    }
+}
+
+#[async_trait::async_trait]
+impl<T, S, R> TxnService for RdsStore<T, S, R>
+where
+    Self: KvQueryExecutor<T> + Send + Sync,
+    T: Executor + 'static,
+    S: ExecutorFactory<T> + 'static,
+{
+    type Error = Error;
+
+    async fn txn(&self, txn: KvTxn) -> Result<KvTxnResponse> {
+        let _timer = METRIC_META_TXN_REQUEST
+            .with_label_values(&[T::name(), "txn"])
+            .start_timer();
+
+        let mut backoff = ExponentialBuilder::default()
+            .with_min_delay(Duration::from_millis(10))
+            .with_max_delay(Duration::from_millis(200))
+            .with_max_times(self.txn_retry_count)
+            .build();
+
+        loop {
+            match self.txn_inner(&txn).await {
+                Ok(res) => return Ok(res),
+                Err(e) => {
+                    if e.is_serialization_error() {
+                        let d = backoff.next();
+                        if let Some(d) = d {
+                            tokio::time::sleep(d).await;
+                            continue;
+                        }
+                        break;
+                    } else {
+                        return Err(e);
+                    }
+                }
+            }
+        }
+
+        RdsTransactionRetryFailedSnafu {}.fail()
+    }
+
+    fn max_txn_ops(&self) -> usize {
+        self.max_txn_ops
+    }
+}
+
+/// Checks if the transaction operations are the same type.
+fn check_txn_ops(txn_ops: &[TxnOp]) -> Result<bool> {
+    if txn_ops.is_empty() {
+        return Ok(false);
+    }
+    let same = txn_ops.windows(2).all(|a| {
+        matches!(
+            (&a[0], &a[1]),
+            (TxnOp::Put(_, _), TxnOp::Put(_, _))
+                | (TxnOp::Get(_), TxnOp::Get(_))
+                | (TxnOp::Delete(_), TxnOp::Delete(_))
+        )
+    });
+    Ok(same)
+}
--- a/src/common/meta/src/kv_backend/rds/postgres.rs
+++ b/src/common/meta/src/kv_backend/rds/postgres.rs
@@ -0,0 +1,624 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+use common_telemetry::debug;
+use deadpool_postgres::{Config, Pool, Runtime};
+use snafu::ResultExt;
+use tokio_postgres::types::ToSql;
+use tokio_postgres::{IsolationLevel, NoTls, Row};
+
+use crate::error::{
+    CreatePostgresPoolSnafu, GetPostgresConnectionSnafu, PostgresExecutionSnafu,
+    PostgresTransactionSnafu, Result,
+};
+use crate::kv_backend::rds::{
+    Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
+    RDS_STORE_TXN_RETRY_COUNT,
+};
+use crate::kv_backend::KvBackendRef;
+use crate::rpc::store::{
+    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
+    BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, RangeRequest, RangeResponse,
+};
+use crate::rpc::KeyValue;
+
+pub struct PgClient(deadpool::managed::Object<deadpool_postgres::Manager>);
+pub struct PgTxnClient<'a>(deadpool_postgres::Transaction<'a>);
+
+/// Converts a row to a [`KeyValue`].
+fn key_value_from_row(r: Row) -> KeyValue {
+    KeyValue {
+        key: r.get(0),
+        value: r.get(1),
+    }
+}
+
+const EMPTY: &[u8] = &[0];
+
+/// Type of range template.
+#[derive(Debug, Clone, Copy)]
+enum RangeTemplateType {
+    Point,
+    Range,
+    Full,
+    LeftBounded,
+    Prefix,
+}
+
+/// Builds params for the given range template type.
+impl RangeTemplateType {
+    fn build_params(&self, mut key: Vec<u8>, range_end: Vec<u8>) -> Vec<Vec<u8>> {
+        match self {
+            RangeTemplateType::Point => vec![key],
+            RangeTemplateType::Range => vec![key, range_end],
+            RangeTemplateType::Full => vec![],
+            RangeTemplateType::LeftBounded => vec![key],
+            RangeTemplateType::Prefix => {
+                key.push(b'%');
+                vec![key]
+            }
+        }
+    }
+}
+
+/// Templates for range request.
+#[derive(Debug, Clone)]
+struct RangeTemplate {
+    point: String,
+    range: String,
+    full: String,
+    left_bounded: String,
+    prefix: String,
+}
+
+impl RangeTemplate {
+    /// Gets the template for the given type.
+    fn get(&self, typ: RangeTemplateType) -> &str {
+        match typ {
+            RangeTemplateType::Point => &self.point,
+            RangeTemplateType::Range => &self.range,
+            RangeTemplateType::Full => &self.full,
+            RangeTemplateType::LeftBounded => &self.left_bounded,
+            RangeTemplateType::Prefix => &self.prefix,
+        }
+    }
+
+    /// Adds limit to the template.
+    fn with_limit(template: &str, limit: i64) -> String {
+        if limit == 0 {
+            return format!("{};", template);
+        }
+        format!("{} LIMIT {};", template, limit)
+    }
+}
+
+fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
+    if start.len() != end.len() {
+        return false;
+    }
+    let l = start.len();
+    let same_prefix = start[0..l - 1] == end[0..l - 1];
+    if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
+        return same_prefix && (*rhs + 1) == *lhs;
+    }
+    false
+}
+
+/// Determine the template type for range request.
+fn range_template(key: &[u8], range_end: &[u8]) -> RangeTemplateType {
+    match (key, range_end) {
+        (_, &[]) => RangeTemplateType::Point,
+        (EMPTY, EMPTY) => RangeTemplateType::Full,
+        (_, EMPTY) => RangeTemplateType::LeftBounded,
+        (start, end) => {
+            if is_prefix_range(start, end) {
+                RangeTemplateType::Prefix
+            } else {
+                RangeTemplateType::Range
+            }
+        }
+    }
+}
+
+/// Generate in placeholders for PostgreSQL.
+fn pg_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {
+    (from..=to).map(|i| format!("${}", i)).collect()
+}
+
+/// Factory for building sql templates.
+struct PgSqlTemplateFactory<'a> {
+    table_name: &'a str,
+}
+
+impl<'a> PgSqlTemplateFactory<'a> {
+    /// Creates a new [`SqlTemplateFactory`] with the given table name.
+    fn new(table_name: &'a str) -> Self {
+        Self { table_name }
+    }
+
+    /// Builds the template set for the given table name.
+    fn build(&self) -> PgSqlTemplateSet {
+        let table_name = self.table_name;
+        PgSqlTemplateSet {
+            table_name: table_name.to_string(),
+            create_table_statement: format!(
+                "CREATE TABLE IF NOT EXISTS {table_name}(k bytea PRIMARY KEY, v bytea)",
+            ),
+            range_template: RangeTemplate {
+                point: format!("SELECT k, v FROM {table_name} WHERE k = $1"),
+                range: format!("SELECT k, v FROM {table_name} WHERE k >= $1 AND k < $2 ORDER BY k"),
+                full: format!("SELECT k, v FROM {table_name} $1 ORDER BY k"),
+                left_bounded: format!("SELECT k, v FROM {table_name} WHERE k >= $1 ORDER BY k"),
+                prefix: format!("SELECT k, v FROM {table_name} WHERE k LIKE $1 ORDER BY k"),
+            },
+            delete_template: RangeTemplate {
+                point: format!("DELETE FROM {table_name} WHERE k = $1 RETURNING k,v;"),
+                range: format!("DELETE FROM {table_name} WHERE k >= $1 AND k < $2 RETURNING k,v;"),
+                full: format!("DELETE FROM {table_name} RETURNING k,v"),
+                left_bounded: format!("DELETE FROM {table_name} WHERE k >= $1 RETURNING k,v;"),
+                prefix: format!("DELETE FROM {table_name} WHERE k LIKE $1 RETURNING k,v;"),
+            },
+        }
+    }
+}
+
+/// Templates for the given table name.
+#[derive(Debug, Clone)]
+pub struct PgSqlTemplateSet {
+    table_name: String,
+    create_table_statement: String,
+    range_template: RangeTemplate,
+    delete_template: RangeTemplate,
+}
+
+impl PgSqlTemplateSet {
+    /// Generates the sql for batch get.
+    fn generate_batch_get_query(&self, key_len: usize) -> String {
+        let table_name = &self.table_name;
+        let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
+        format!("SELECT k, v FROM {table_name} WHERE k in ({});", in_clause)
+    }
+
+    /// Generates the sql for batch delete.
+    fn generate_batch_delete_query(&self, key_len: usize) -> String {
+        let table_name = &self.table_name;
+        let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
+        format!(
+            "DELETE FROM {table_name} WHERE k in ({}) RETURNING k,v;",
+            in_clause
+        )
+    }
+
+    /// Generates the sql for batch upsert.
+    fn generate_batch_upsert_query(&self, kv_len: usize) -> String {
+        let table_name = &self.table_name;
+        let in_placeholders: Vec<String> = (1..=kv_len).map(|i| format!("${}", i)).collect();
+        let in_clause = in_placeholders.join(", ");
+        let mut param_index = kv_len + 1;
+        let mut values_placeholders = Vec::new();
+        for _ in 0..kv_len {
+            values_placeholders.push(format!("(${0}, ${1})", param_index, param_index + 1));
+            param_index += 2;
+        }
+        let values_clause = values_placeholders.join(", ");
+
+        format!(
+            r#"
+    WITH prev AS (
+        SELECT k,v FROM {table_name} WHERE k IN ({in_clause})
+    ), update AS (
+    INSERT INTO {table_name} (k, v) VALUES
+        {values_clause}
+    ON CONFLICT (
+        k
+    ) DO UPDATE SET
+        v = excluded.v
+    )
+
+    SELECT k, v FROM prev;
+    "#
+        )
+    }
+}
+
+#[async_trait::async_trait]
+impl Executor for PgClient {
+    type Transaction<'a>
+        = PgTxnClient<'a>
+    where
+        Self: 'a;
+
+    fn name() -> &'static str {
+        "Postgres"
+    }
+
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
+        let params: Vec<&(dyn ToSql + Sync)> = params.iter().map(|p| p as _).collect();
+        let stmt = self
+            .0
+            .prepare_cached(query)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        let rows = self
+            .0
+            .query(&stmt, &params)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        Ok(rows.into_iter().map(key_value_from_row).collect())
+    }
+
+    async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>> {
+        let txn = self
+            .0
+            .build_transaction()
+            .isolation_level(IsolationLevel::Serializable)
+            .start()
+            .await
+            .context(PostgresTransactionSnafu {
+                operation: "begin".to_string(),
+            })?;
+        Ok(PgTxnClient(txn))
+    }
+}
+
+#[async_trait::async_trait]
+impl<'a> Transaction<'a> for PgTxnClient<'a> {
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
+        let params: Vec<&(dyn ToSql + Sync)> = params.iter().map(|p| p as _).collect();
+        let stmt = self
+            .0
+            .prepare_cached(query)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        let rows = self
+            .0
+            .query(&stmt, &params)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        Ok(rows.into_iter().map(key_value_from_row).collect())
+    }
+
+    async fn commit(self) -> Result<()> {
+        self.0.commit().await.context(PostgresTransactionSnafu {
+            operation: "commit",
+        })?;
+        Ok(())
+    }
+}
+
+pub struct PgExecutorFactory {
+    pool: Pool,
+}
+
+impl PgExecutorFactory {
+    async fn client(&self) -> Result<PgClient> {
+        match self.pool.get().await {
+            Ok(client) => Ok(PgClient(client)),
+            Err(e) => GetPostgresConnectionSnafu {
+                reason: e.to_string(),
+            }
+            .fail(),
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl ExecutorFactory<PgClient> for PgExecutorFactory {
+    async fn default_executor(&self) -> Result<PgClient> {
+        self.client().await
+    }
+
+    async fn txn_executor<'a>(
+        &self,
+        default_executor: &'a mut PgClient,
+    ) -> Result<PgTxnClient<'a>> {
+        default_executor.txn_executor().await
+    }
+}
+
+/// A PostgreSQL-backed key-value store for metasrv.
+/// It uses [deadpool_postgres::Pool] as the connection pool for [RdsStore].
+pub type PgStore = RdsStore<PgClient, PgExecutorFactory, PgSqlTemplateSet>;
+
+#[async_trait::async_trait]
+impl KvQueryExecutor<PgClient> for PgStore {
+    async fn range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: RangeRequest,
+    ) -> Result<RangeResponse> {
+        let template_type = range_template(&req.key, &req.range_end);
+        let template = self.sql_template_set.range_template.get(template_type);
+        let params = template_type.build_params(req.key, req.range_end);
+        let params_ref = params.iter().collect::<Vec<_>>();
+        // Always add 1 to limit to check if there is more data
+        let query =
+            RangeTemplate::with_limit(template, if req.limit == 0 { 0 } else { req.limit + 1 });
+        let limit = req.limit as usize;
+        debug!("query: {:?}, params: {:?}", query, params);
+        let mut kvs = query_executor.query(&query, &params_ref).await?;
+        if req.keys_only {
+            kvs.iter_mut().for_each(|kv| kv.value = vec![]);
+        }
+        // If limit is 0, we always return all data
+        if limit == 0 || kvs.len() <= limit {
+            return Ok(RangeResponse { kvs, more: false });
+        }
+        // If limit is greater than the number of rows, we remove the last row and set more to true
+        let removed = kvs.pop();
+        debug_assert!(removed.is_some());
+        Ok(RangeResponse { kvs, more: true })
+    }
+
+    async fn batch_put_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: BatchPutRequest,
+    ) -> Result<BatchPutResponse> {
+        let mut in_params = Vec::with_capacity(req.kvs.len() * 3);
+        let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
+
+        for kv in &req.kvs {
+            let processed_key = &kv.key;
+            in_params.push(processed_key);
+
+            let processed_value = &kv.value;
+            values_params.push(processed_key);
+            values_params.push(processed_value);
+        }
+        in_params.extend(values_params);
+        let params = in_params.iter().map(|x| x as _).collect::<Vec<_>>();
+        let query = self
+            .sql_template_set
+            .generate_batch_upsert_query(req.kvs.len());
+        let kvs = query_executor.query(&query, &params).await?;
+        if req.prev_kv {
+            Ok(BatchPutResponse { prev_kvs: kvs })
+        } else {
+            Ok(BatchPutResponse::default())
+        }
+    }
+
+    /// Batch get with certain client. It's needed for a client with transaction.
+    async fn batch_get_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: BatchGetRequest,
+    ) -> Result<BatchGetResponse> {
+        if req.keys.is_empty() {
+            return Ok(BatchGetResponse { kvs: vec![] });
+        }
+        let query = self
+            .sql_template_set
+            .generate_batch_get_query(req.keys.len());
+        let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
+        let kvs = query_executor.query(&query, &params).await?;
+        Ok(BatchGetResponse { kvs })
+    }
+
+    async fn delete_range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: DeleteRangeRequest,
+    ) -> Result<DeleteRangeResponse> {
+        let template_type = range_template(&req.key, &req.range_end);
+        let template = self.sql_template_set.delete_template.get(template_type);
+        let params = template_type.build_params(req.key, req.range_end);
+        let params_ref = params.iter().map(|x| x as _).collect::<Vec<_>>();
+        let kvs = query_executor.query(template, &params_ref).await?;
+        let mut resp = DeleteRangeResponse::new(kvs.len() as i64);
+        if req.prev_kv {
+            resp.with_prev_kvs(kvs);
+        }
+        Ok(resp)
+    }
+
+    async fn batch_delete_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: BatchDeleteRequest,
+    ) -> Result<BatchDeleteResponse> {
+        if req.keys.is_empty() {
+            return Ok(BatchDeleteResponse::default());
+        }
+        let query = self
+            .sql_template_set
+            .generate_batch_delete_query(req.keys.len());
+        let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
+        let kvs = query_executor.query(&query, &params).await?;
+        if req.prev_kv {
+            Ok(BatchDeleteResponse { prev_kvs: kvs })
+        } else {
+            Ok(BatchDeleteResponse::default())
+        }
+    }
+}
+
+impl PgStore {
+    /// Create [PgStore] impl of [KvBackendRef] from url.
+    pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
+        let mut cfg = Config::new();
+        cfg.url = Some(url.to_string());
+        // TODO(weny, CookiePie): add tls support
+        let pool = cfg
+            .create_pool(Some(Runtime::Tokio1), NoTls)
+            .context(CreatePostgresPoolSnafu)?;
+        Self::with_pg_pool(pool, table_name, max_txn_ops).await
+    }
+
+    /// Create [PgStore] impl of [KvBackendRef] from [deadpool_postgres::Pool].
+    pub async fn with_pg_pool(
+        pool: Pool,
+        table_name: &str,
+        max_txn_ops: usize,
+    ) -> Result<KvBackendRef> {
+        // This step ensures the postgres metadata backend is ready to use.
+        // We check if greptime_metakv table exists, and we will create a new table
+        // if it does not exist.
+        let client = match pool.get().await {
+            Ok(client) => client,
+            Err(e) => {
+                return GetPostgresConnectionSnafu {
+                    reason: e.to_string(),
+                }
+                .fail();
+            }
+        };
+        let template_factory = PgSqlTemplateFactory::new(table_name);
+        let sql_template_set = template_factory.build();
+        client
+            .execute(&sql_template_set.create_table_statement, &[])
+            .await
+            .with_context(|_| PostgresExecutionSnafu {
+                sql: sql_template_set.create_table_statement.to_string(),
+            })?;
+        Ok(Arc::new(Self {
+            max_txn_ops,
+            sql_template_set,
+            txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
+            executor_factory: PgExecutorFactory { pool },
+            _phantom: PhantomData,
+        }))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::kv_backend::test::{
+        prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
+        test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
+        test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
+        test_txn_compare_equal, test_txn_compare_greater, test_txn_compare_less,
+        test_txn_compare_not_equal, test_txn_one_compare_op, text_txn_multi_compare_op,
+        unprepare_kv,
+    };
+
+    async fn build_pg_kv_backend(table_name: &str) -> Option<PgStore> {
+        let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap_or_default();
+        if endpoints.is_empty() {
+            return None;
+        }
+
+        let mut cfg = Config::new();
+        cfg.url = Some(endpoints);
+        let pool = cfg
+            .create_pool(Some(Runtime::Tokio1), NoTls)
+            .context(CreatePostgresPoolSnafu)
+            .unwrap();
+        let client = pool.get().await.unwrap();
+        let template_factory = PgSqlTemplateFactory::new(table_name);
+        let sql_templates = template_factory.build();
+        client
+            .execute(&sql_templates.create_table_statement, &[])
+            .await
+            .context(PostgresExecutionSnafu {
+                sql: sql_templates.create_table_statement.to_string(),
+            })
+            .unwrap();
+        Some(PgStore {
+            max_txn_ops: 128,
+            sql_template_set: sql_templates,
+            txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
+            executor_factory: PgExecutorFactory { pool },
+            _phantom: PhantomData,
+        })
+    }
+
+    #[tokio::test]
+    async fn test_pg_put() {
+        let kv_backend = build_pg_kv_backend("put_test").await.unwrap();
+        let prefix = b"put/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_range() {
+        let kv_backend = build_pg_kv_backend("range_test").await.unwrap();
+        let prefix = b"range/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_range_2() {
+        let kv_backend = build_pg_kv_backend("range2_test").await.unwrap();
+        let prefix = b"range2/";
+        test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_batch_get() {
+        let kv_backend = build_pg_kv_backend("batch_get_test").await.unwrap();
+        let prefix = b"batch_get/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_batch_delete() {
+        let kv_backend = build_pg_kv_backend("batch_delete_test").await.unwrap();
+        let prefix = b"batch_delete/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_batch_delete_with_prefix() {
+        let kv_backend = build_pg_kv_backend("batch_delete_with_prefix_test")
+            .await
+            .unwrap();
+        let prefix = b"batch_delete/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_batch_delete_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_delete_range() {
+        let kv_backend = build_pg_kv_backend("delete_range_test").await.unwrap();
+        let prefix = b"delete_range/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_compare_and_put() {
+        let kv_backend = build_pg_kv_backend("compare_and_put_test").await.unwrap();
+        let prefix = b"compare_and_put/";
+        let kv_backend = Arc::new(kv_backend);
+        test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_txn() {
+        let kv_backend = build_pg_kv_backend("txn_test").await.unwrap();
+        test_txn_one_compare_op(&kv_backend).await;
+        text_txn_multi_compare_op(&kv_backend).await;
+        test_txn_compare_equal(&kv_backend).await;
+        test_txn_compare_greater(&kv_backend).await;
+        test_txn_compare_less(&kv_backend).await;
+        test_txn_compare_not_equal(&kv_backend).await;
+    }
+}
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -34,6 +34,7 @@ pub mod kv_backend;
 pub mod leadership_notifier;
 pub mod lock_key;
 pub mod metrics;
+pub mod node_expiry_listener;
 pub mod node_manager;
 pub mod peer;
 pub mod range_stream;
--- a/src/common/meta/src/node_expiry_listener.rs
+++ b/src/common/meta/src/node_expiry_listener.rs
@@ -0,0 +1,152 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Mutex;
+use std::time::Duration;
+
+use common_telemetry::{debug, error, info, warn};
+use tokio::task::JoinHandle;
+use tokio::time::{interval, MissedTickBehavior};
+
+use crate::cluster::{NodeInfo, NodeInfoKey};
+use crate::error;
+use crate::kv_backend::ResettableKvBackendRef;
+use crate::leadership_notifier::LeadershipChangeListener;
+use crate::rpc::store::RangeRequest;
+use crate::rpc::KeyValue;
+
+/// [NodeExpiryListener] periodically checks all node info in memory and removes
+/// expired node info to prevent memory leak.
+pub struct NodeExpiryListener {
+    handle: Mutex<Option<JoinHandle<()>>>,
+    max_idle_time: Duration,
+    in_memory: ResettableKvBackendRef,
+}
+
+impl Drop for NodeExpiryListener {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
+
+impl NodeExpiryListener {
+    pub fn new(max_idle_time: Duration, in_memory: ResettableKvBackendRef) -> Self {
+        Self {
+            handle: Mutex::new(None),
+            max_idle_time,
+            in_memory,
+        }
+    }
+
+    async fn start(&self) {
+        let mut handle = self.handle.lock().unwrap();
+        if handle.is_none() {
+            let in_memory = self.in_memory.clone();
+
+            let max_idle_time = self.max_idle_time;
+            let ticker_loop = tokio::spawn(async move {
+                // Run clean task every minute.
+                let mut interval = interval(Duration::from_secs(60));
+                interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
+                loop {
+                    interval.tick().await;
+                    if let Err(e) = Self::clean_expired_nodes(&in_memory, max_idle_time).await {
+                        error!(e; "Failed to clean expired node");
+                    }
+                }
+            });
+            *handle = Some(ticker_loop);
+        }
+    }
+
+    fn stop(&self) {
+        if let Some(handle) = self.handle.lock().unwrap().take() {
+            handle.abort();
+            info!("Node expiry listener stopped")
+        }
+    }
+
+    /// Cleans expired nodes from memory.
+    async fn clean_expired_nodes(
+        in_memory: &ResettableKvBackendRef,
+        max_idle_time: Duration,
+    ) -> error::Result<()> {
+        let node_keys = Self::list_expired_nodes(in_memory, max_idle_time).await?;
+        for key in node_keys {
+            let key_bytes: Vec<u8> = (&key).into();
+            if let Err(e) = in_memory.delete(&key_bytes, false).await {
+                warn!(e; "Failed to delete expired node: {:?}", key_bytes);
+            } else {
+                debug!("Deleted expired node key: {:?}", key);
+            }
+        }
+        Ok(())
+    }
+
+    /// Lists expired nodes that have been inactive more than `max_idle_time`.
+    async fn list_expired_nodes(
+        in_memory: &ResettableKvBackendRef,
+        max_idle_time: Duration,
+    ) -> error::Result<impl Iterator<Item = NodeInfoKey>> {
+        let prefix = NodeInfoKey::key_prefix_with_cluster_id(0);
+        let req = RangeRequest::new().with_prefix(prefix);
+        let current_time_millis = common_time::util::current_time_millis();
+        let resp = in_memory.range(req).await?;
+        Ok(resp
+            .kvs
+            .into_iter()
+            .filter_map(move |KeyValue { key, value }| {
+                let Ok(info) = NodeInfo::try_from(value).inspect_err(|e| {
+                    warn!(e; "Unrecognized node info value");
+                }) else {
+                    return None;
+                };
+                if (current_time_millis - info.last_activity_ts) > max_idle_time.as_millis() as i64
+                {
+                    NodeInfoKey::try_from(key)
+                        .inspect_err(|e| {
+                            warn!(e; "Unrecognized node info key: {:?}", info.peer);
+                        })
+                        .ok()
+                        .inspect(|node_key| {
+                            debug!("Found expired node: {:?}", node_key);
+                        })
+                } else {
+                    None
+                }
+            }))
+    }
+}
+
+#[async_trait::async_trait]
+impl LeadershipChangeListener for NodeExpiryListener {
+    fn name(&self) -> &str {
+        "NodeExpiryListener"
+    }
+
+    async fn on_leader_start(&self) -> error::Result<()> {
+        self.start().await;
+        info!(
+            "On leader start, node expiry listener started with max idle time: {:?}",
+            self.max_idle_time
+        );
+        Ok(())
+    }
+
+    async fn on_leader_stop(&self) -> error::Result<()> {
+        self.stop();
+        info!("On leader stop, node expiry listener stopped");
+        Ok(())
+    }
+}
--- a/src/common/meta/src/rpc/ddl.rs
+++ b/src/common/meta/src/rpc/ddl.rs
@@ -1239,6 +1239,7 @@ impl From<QueryContext> for PbQueryContext {
            timezone,
            extensions,
            channel: channel as u32,
+            snapshot_seqs: None,
        }
    }
 }
--- a/Show More
+++ b/Show More