fix: correct inverted_indexed_column_ids behavior (#5586 )

* fix: correct `inverted_indexed_column_ids` * fix: fix unit tests
feat: remove default inverted index for physical table (#5583 )
2026-01-04 12:22:55 +00:00 · 2025-02-23 07:17:38 +00:00 · 2025-02-22 06:48:05 +00:00 · 2025-02-22 05:18:26 +00:00 · 2025-02-21 09:27:03 +00:00 · 2025-02-21 07:05:19 +00:00
314 changed files with 15350 additions and 6813 deletions
--- a/.github/actions/build-greptime-images/action.yml
+++ b/.github/actions/build-greptime-images/action.yml
@@ -34,8 +34,8 @@ inputs:
    required: true
  push-latest-tag:
    description: Whether to push the latest tag
-    required: false
-    default: 'true'
+    required: true
+    default: 'false'
 runs:
  using: composite
  steps:
@@ -47,7 +47,11 @@ runs:
        password: ${{ inputs.image-registry-password }}

    - name: Set up qemu for multi-platform builds
-      uses: docker/setup-qemu-action@v2
+      uses: docker/setup-qemu-action@v3
+      with:
+        platforms: linux/amd64,linux/arm64
+        # The latest version will lead to segmentation fault.
+        image: tonistiigi/binfmt:qemu-v7.0.0-28

    - name: Set up buildx
      uses: docker/setup-buildx-action@v2
--- a/.github/actions/build-images/action.yml
+++ b/.github/actions/build-images/action.yml
@@ -22,8 +22,8 @@ inputs:
    required: true
  push-latest-tag:
    description: Whether to push the latest tag
-    required: false
-    default: 'true'
+    required: true
+    default: 'false'
  dev-mode:
    description: Enable dev mode, only build standard greptime
    required: false
--- a/.github/actions/release-cn-artifacts/action.yaml
+++ b/.github/actions/release-cn-artifacts/action.yaml
@@ -51,8 +51,8 @@ inputs:
    required: true
  upload-to-s3:
    description: Upload to S3
-    required: false
-    default: 'true'
+    required: true
+    default: 'false'
  artifacts-dir:
    description: Directory to store artifacts
    required: false
@@ -77,13 +77,21 @@ runs:
      with:
        path: ${{ inputs.artifacts-dir }}

+    - name: Install s5cmd
+      shell: bash
+      run: |
+        wget https://github.com/peak/s5cmd/releases/download/v2.3.0/s5cmd_2.3.0_Linux-64bit.tar.gz
+        tar -xzf s5cmd_2.3.0_Linux-64bit.tar.gz
+        sudo mv s5cmd /usr/local/bin/
+        sudo chmod +x /usr/local/bin/s5cmd
+
    - name: Release artifacts to cn region
      uses: nick-invision/retry@v2
      if: ${{ inputs.upload-to-s3 == 'true' }}
      env:
        AWS_ACCESS_KEY_ID: ${{ inputs.aws-cn-access-key-id }}
        AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-cn-secret-access-key }}
-        AWS_DEFAULT_REGION: ${{ inputs.aws-cn-region }}
+        AWS_REGION: ${{ inputs.aws-cn-region }}
        UPDATE_VERSION_INFO: ${{ inputs.update-version-info }}
      with:
        max_attempts: ${{ inputs.upload-max-retry-times }}
--- a/.github/scripts/upload-artifacts-to-s3.sh
+++ b/.github/scripts/upload-artifacts-to-s3.sh
@@ -33,7 +33,7 @@ function upload_artifacts() {
  #    ├── greptime-darwin-amd64-v0.2.0.sha256sum
  #    └── greptime-darwin-amd64-v0.2.0.tar.gz
  find "$ARTIFACTS_DIR" -type f \( -name "*.tar.gz" -o -name "*.sha256sum" \) | while IFS= read -r file; do
-    aws s3 cp \
+    s5cmd cp \
      "$file" "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/$VERSION/$(basename "$file")"
  done
 }
@@ -45,7 +45,7 @@ function update_version_info() {
    if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
      echo "Updating latest-version.txt"
      echo "$VERSION" > latest-version.txt
-      aws s3 cp \
+      s5cmd cp \
        latest-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-version.txt"
    fi

@@ -53,7 +53,7 @@ function update_version_info() {
    if [[ "$VERSION" == *"nightly"* ]]; then
      echo "Updating latest-nightly-version.txt"
      echo "$VERSION" > latest-nightly-version.txt
-      aws s3 cp \
+      s5cmd cp \
        latest-nightly-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-nightly-version.txt"
    fi
  fi
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -17,6 +17,8 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v4
+      with:
+        persist-credentials: false
    - uses: arduino/setup-protoc@v3
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/dependency-check.yml
+++ b/.github/workflows/dependency-check.yml
@@ -12,6 +12,8 @@ jobs:
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
+      with:
+        persist-credentials: false

    - name: Set up Rust
      uses: actions-rust-lang/setup-rust-toolchain@v1
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -76,15 +76,9 @@ env:

  NIGHTLY_RELEASE_PREFIX: nightly

-  # Use the different image name to avoid conflict with the release images.
-  IMAGE_NAME: greptimedb-dev
-
  # The source code will check out in the following path: '${WORKING_DIR}/dev/greptime'.
  CHECKOUT_GREPTIMEDB_PATH: dev/greptimedb

-permissions:
-  issues: write
-
 jobs:
  allocate-runners:
    name: Allocate runners
@@ -107,6 +101,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Create version
        id: create-version
@@ -161,6 +156,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Checkout greptimedb
        uses: actions/checkout@v4
@@ -168,6 +164,7 @@ jobs:
          repository: ${{ inputs.repository }}
          ref: ${{ inputs.commit }}
          path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
+          persist-credentials: true

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -192,6 +189,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Checkout greptimedb
        uses: actions/checkout@v4
@@ -199,6 +197,7 @@ jobs:
          repository: ${{ inputs.repository }}
          ref: ${{ inputs.commit }}
          path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
+          persist-credentials: true

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -226,13 +225,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Build and push images to dockerhub
        uses: ./.github/actions/build-images
        with:
          image-registry: docker.io
          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          image-name: ${{ env.IMAGE_NAME }}
+          image-name: ${{ vars.DEV_BUILD_IMAGE_NAME }}
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
@@ -257,13 +257,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Release artifacts to CN region
        uses: ./.github/actions/release-cn-artifacts
        with:
          src-image-registry: docker.io
          src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          src-image-name: ${{ env.IMAGE_NAME }}
+          src-image-name: ${{ vars.DEV_BUILD_IMAGE_NAME }}
          dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
          dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
          dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -273,6 +274,7 @@ jobs:
          aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+          upload-to-s3: false
          dev-mode: true                     # Only build the standard images(exclude centos images).
          push-latest-tag: false             # Don't push the latest tag to registry.
          update-version-info: false         # Don't update the version info in S3.
@@ -291,6 +293,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -316,6 +319,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -334,10 +338,16 @@ jobs:
      release-images-to-dockerhub
    ]
    runs-on: ubuntu-20.04
+    permissions:
+      issues: write
+
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -26,6 +26,8 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: crate-ci/typos@master
      - name: Check the config docs
        run: |
@@ -38,6 +40,8 @@ jobs:
    name: Check License Header
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: korandoru/hawkeye@v5

  check:
@@ -49,6 +53,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -70,6 +76,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: actions-rust-lang/setup-rust-toolchain@v1
      - name: Install taplo
        run: cargo +stable install taplo-cli --version ^0.9 --locked --force
@@ -85,6 +93,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -139,6 +149,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -192,6 +204,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -238,6 +252,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -295,6 +311,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - name: Setup Kind
        uses: ./.github/actions/setup-kind
      - if: matrix.mode.minio
@@ -437,6 +455,8 @@ jobs:
          echo "Disk space after:"
          df -h
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - name: Setup Kind
        uses: ./.github/actions/setup-kind
      - name: Setup Chaos Mesh
@@ -562,6 +582,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - if: matrix.mode.kafka
        name: Setup kafka server
        working-directory: tests-integration/fixtures
@@ -589,6 +611,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -604,6 +628,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -626,6 +652,8 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - name: Merge Conflict Finder
        uses: olivernybroe/action-conflict-finder@v4.0

@@ -636,6 +664,8 @@ jobs:
    needs:  [conflict-check, clippy, fmt]
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -684,6 +714,8 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: arduino/setup-protoc@v3
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docbot.yml
+++ b/.github/workflows/docbot.yml
@@ -3,16 +3,21 @@ on:
  pull_request_target:
    types: [opened, edited]

-permissions:
-  pull-requests: write
-  contents: read
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true

 jobs:
  docbot:
    runs-on: ubuntu-20.04
+    permissions:
+      pull-requests: write
+      contents: read
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Maybe Follow Up Docs Issue
        working-directory: cyborg
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -34,6 +34,8 @@ jobs:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: crate-ci/typos@master

  license-header-check:
@@ -41,6 +43,8 @@ jobs:
    name: Check License Header
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: korandoru/hawkeye@v5

  check:
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -66,13 +66,6 @@ env:

  NIGHTLY_RELEASE_PREFIX: nightly

-  # Use the different image name to avoid conflict with the release images.
-  # The DockerHub image will be greptime/greptimedb-nightly.
-  IMAGE_NAME: greptimedb-nightly
-
-permissions:
-  issues: write
-
 jobs:
  allocate-runners:
    name: Allocate runners
@@ -95,6 +88,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Create version
        id: create-version
@@ -147,6 +141,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -168,6 +163,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -193,17 +189,18 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Build and push images to dockerhub
        uses: ./.github/actions/build-images
        with:
          image-registry: docker.io
          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          image-name: ${{ env.IMAGE_NAME }}
+          image-name: ${{ vars.NIGHTLY_BUILD_IMAGE_NAME }}
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: true
+          push-latest-tag: false

      - name: Set nightly build result
        id: set-nightly-build-result
@@ -226,13 +223,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Release artifacts to CN region
        uses: ./.github/actions/release-cn-artifacts
        with:
          src-image-registry: docker.io
          src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          src-image-name: ${{ env.IMAGE_NAME }}
+          src-image-name: ${{ vars.NIGHTLY_BUILD_IMAGE_NAME }}
          dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
          dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
          dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -242,9 +240,10 @@ jobs:
          aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
+          upload-to-s3: false
          dev-mode: false
          update-version-info: false  # Don't update version info in S3.
-          push-latest-tag: true
+          push-latest-tag: false

  stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
    name: Stop linux-amd64 runner
@@ -260,6 +259,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -285,6 +285,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -303,10 +304,14 @@ jobs:
      release-images-to-dockerhub
    ]
    runs-on: ubuntu-20.04
+    permissions:
+      issues: write
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -9,9 +9,6 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

-permissions:
-  issues: write
-
 jobs:
  sqlness-test:
    name: Run sqlness test
@@ -22,6 +19,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Check install.sh
        run: ./.github/scripts/check-install-script.sh
@@ -46,9 +44,14 @@ jobs:
    name: Sqlness tests on Windows
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    runs-on: windows-2022-8-cores
+    permissions:
+      issues: write
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - uses: arduino/setup-protoc@v3
        with:
@@ -76,6 +79,9 @@ jobs:
    steps:
      - run: git config --global core.autocrlf false
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - uses: arduino/setup-protoc@v3
        with:
@@ -111,9 +117,13 @@ jobs:
  cleanbuild-linux-nix:
    name: Run clean build on Linux
    runs-on: ubuntu-latest
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: cachix/install-nix-action@v27
        with:
          nix_path: nixpkgs=channel:nixos-24.11
@@ -141,6 +151,9 @@ jobs:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/release-dev-builder-images.yaml
+++ b/.github/workflows/release-dev-builder-images.yaml
@@ -37,6 +37,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Configure build image version
        id: set-version
@@ -85,48 +86,66 @@ jobs:
      - name: Push dev-builder-ubuntu image
        shell: bash
        if: ${{ inputs.release_dev_builder_ubuntu_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
+          ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION

          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:latest \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-ubuntu:latest
+
      - name: Push dev-builder-centos image
        shell: bash
        if: ${{ inputs.release_dev_builder_centos_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
+          ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION

          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:latest \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:latest
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:latest \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-centos:latest
+
      - name: Push dev-builder-android image
        shell: bash
        if: ${{ inputs.release_dev_builder_android_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
+          ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION

          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:latest \
-            docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:latest
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:latest \
+            docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-android:latest
+
  release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
    name: Release dev builder images to CN region
    runs-on: ubuntu-20.04
@@ -144,29 +163,41 @@ jobs:
      - name: Push dev-builder-ubuntu image
        shell: bash
        if: ${{ inputs.release_dev_builder_ubuntu_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION \
+            docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION

      - name: Push dev-builder-centos image
        shell: bash
        if: ${{ inputs.release_dev_builder_centos_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION \
+            docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION

      - name: Push dev-builder-android image
        shell: bash
        if: ${{ inputs.release_dev_builder_android_image }}
+        env:
+          IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
+          IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
+          ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
        run: |
          docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
            -e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
            quay.io/skopeo/stable:latest \
-            copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
-            docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
+            copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
+            docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -93,11 +93,6 @@ env:
  # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
  NEXT_RELEASE_VERSION: v0.12.0

-# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
-permissions:
-  issues: write # Allows the action to create issues for cyborg.
-  contents: write # Allows the action to create a release.
-
 jobs:
  allocate-runners:
    name: Allocate runners
@@ -122,6 +117,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Check Rust toolchain version
        shell: bash
@@ -181,6 +177,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -202,6 +199,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-linux-artifacts
        with:
@@ -237,6 +235,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-macos-artifacts
        with:
@@ -276,6 +275,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - uses: ./.github/actions/build-windows-artifacts
        with:
@@ -306,15 +306,18 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Build and push images to dockerhub
        uses: ./.github/actions/build-images
        with:
          image-registry: docker.io
          image-namespace: ${{ vars.IMAGE_NAMESPACE }}
+          image-name: ${{ vars.GREPTIMEDB_IMAGE_NAME }}
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
+          push-latest-tag: true

      - name: Set build image result
        id: set-build-image-result
@@ -341,13 +344,14 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Release artifacts to CN region
        uses: ./.github/actions/release-cn-artifacts
        with:
          src-image-registry: docker.io
          src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
-          src-image-name: greptimedb
+          src-image-name: ${{ vars.GREPTIMEDB_IMAGE_NAME }}
          dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
          dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
          dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -358,6 +362,7 @@ jobs:
          aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
          aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
          dev-mode: false
+          upload-to-s3: true
          update-version-info: true
          push-latest-tag: true

@@ -377,6 +382,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Publish GitHub release
        uses: ./.github/actions/publish-github-release
@@ -400,6 +406,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -425,6 +432,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Stop EC2 runner
        uses: ./.github/actions/stop-runner
@@ -441,8 +449,15 @@ jobs:
    if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [allocate-runners]
    runs-on: ubuntu-20.04
+    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
+    permissions:
+      issues: write # Allows the action to create issues for cyborg.
+      contents: write # Allows the action to create a release.
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Bump doc version
        working-directory: cyborg
@@ -461,10 +476,17 @@ jobs:
      build-windows-artifacts,
    ]
    runs-on: ubuntu-20.04
+    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
+    permissions:
+      issues: write # Allows the action to create issues for cyborg.
+      contents: write # Allows the action to create a release.
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Report CI status
        id: report-ci-status
--- a/.github/workflows/schedule.yml
+++ b/.github/workflows/schedule.yml
@@ -4,18 +4,20 @@ on:
    - cron: '4 2 * * *'
  workflow_dispatch:

-permissions:
-  contents: read
-  issues: write
-  pull-requests: write

 jobs:
  maintenance:
    name: Periodic Maintenance
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      issues: write
+      pull-requests: write
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Do Maintenance
        working-directory: cyborg
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -1,18 +1,24 @@
 name: "Semantic Pull Request"

 on:
-  pull_request_target:
+  pull_request:
    types:
      - opened
      - reopened
      - edited

+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 jobs:
  check:
    runs-on: ubuntu-20.04
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
      - uses: ./.github/actions/setup-cyborg
      - name: Check Pull Request
        working-directory: cyborg
--- a/AUTHOR.md
+++ b/AUTHOR.md
@@ -3,30 +3,28 @@
 ## Individual Committers (in alphabetical order)

 * [CookiePieWw](https://github.com/CookiePieWw)
-* [KKould](https://github.com/KKould)
-* [NiwakaDev](https://github.com/NiwakaDev)
 * [etolbakov](https://github.com/etolbakov)
 * [irenjj](https://github.com/irenjj)
-* [tisonkun](https://github.com/tisonkun)
+* [KKould](https://github.com/KKould)
 * [Lanqing Yang](https://github.com/lyang24)
+* [NiwakaDev](https://github.com/NiwakaDev)
+* [tisonkun](https://github.com/tisonkun)
+

 ## Team Members (in alphabetical order)

-* [Breeze-P](https://github.com/Breeze-P)
-* [GrepTime](https://github.com/GrepTime)
-* [MichaelScofield](https://github.com/MichaelScofield)
-* [Wenjie0329](https://github.com/Wenjie0329)
-* [WenyXu](https://github.com/WenyXu)
-* [ZonaHex](https://github.com/ZonaHex)
 * [apdong2022](https://github.com/apdong2022)
 * [beryl678](https://github.com/beryl678)
+* [Breeze-P](https://github.com/Breeze-P)
 * [daviderli614](https://github.com/daviderli614)
 * [discord9](https://github.com/discord9)
 * [evenyag](https://github.com/evenyag)
 * [fengjiachun](https://github.com/fengjiachun)
 * [fengys1996](https://github.com/fengys1996)
+* [GrepTime](https://github.com/GrepTime)
 * [holalengyu](https://github.com/holalengyu)
 * [killme2008](https://github.com/killme2008)
+* [MichaelScofield](https://github.com/MichaelScofield)
 * [nicecui](https://github.com/nicecui)
 * [paomian](https://github.com/paomian)
 * [shuiyisong](https://github.com/shuiyisong)
@@ -34,11 +32,14 @@
 * [sunng87](https://github.com/sunng87)
 * [v0y4g3r](https://github.com/v0y4g3r)
 * [waynexia](https://github.com/waynexia)
+* [Wenjie0329](https://github.com/Wenjie0329)
+* [WenyXu](https://github.com/WenyXu)
 * [xtang](https://github.com/xtang)
 * [zhaoyingnan01](https://github.com/zhaoyingnan01)
 * [zhongzc](https://github.com/zhongzc)
+* [ZonaHex](https://github.com/ZonaHex)
 * [zyy17](https://github.com/zyy17)

 ## All Contributors

-[![All Contributors](https://contrib.rocks/image?repo=GreptimeTeam/greptimedb)](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)
+To see the full list of contributors, please visit our [Contributors page](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -313,7 +313,7 @@ dependencies = [
 "arrow-data",
 "arrow-schema",
 "chrono",
- "chrono-tz 0.10.1",
+ "chrono-tz",
 "half",
 "hashbrown 0.15.2",
 "num",
@@ -1053,7 +1053,7 @@ dependencies = [
 "bitflags 2.6.0",
 "cexpr",
 "clang-sys",
- "itertools 0.11.0",
+ "itertools 0.13.0",
 "proc-macro2",
 "quote",
 "regex",
@@ -1508,28 +1508,6 @@ dependencies = [
 "windows-targets 0.52.6",
 ]

-[[package]]
-name = "chrono-tz"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e"
-dependencies = [
- "chrono",
- "chrono-tz-build 0.2.1",
- "phf",
-]
-
-[[package]]
-name = "chrono-tz"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb"
-dependencies = [
- "chrono",
- "chrono-tz-build 0.3.0",
- "phf",
-]
-
 [[package]]
 name = "chrono-tz"
 version = "0.10.1"
@@ -1537,32 +1515,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f"
 dependencies = [
 "chrono",
- "chrono-tz-build 0.4.0",
+ "chrono-tz-build",
 "phf",
 ]

-[[package]]
-name = "chrono-tz-build"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f"
-dependencies = [
- "parse-zoneinfo",
- "phf",
- "phf_codegen",
-]
-
-[[package]]
-name = "chrono-tz-build"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1"
-dependencies = [
- "parse-zoneinfo",
- "phf",
- "phf_codegen",
-]
-
 [[package]]
 name = "chrono-tz-build"
 version = "0.4.0"
@@ -2057,6 +2013,7 @@ dependencies = [
 "approx 0.5.1",
 "arc-swap",
 "async-trait",
+ "bincode",
 "common-base",
 "common-catalog",
 "common-error",
@@ -2090,6 +2047,7 @@ dependencies = [
 "store-api",
 "table",
 "tokio",
+ "uddsketch",
 "wkt",
 ]

@@ -2425,7 +2383,7 @@ version = "0.12.0"
 dependencies = [
 "arrow",
 "chrono",
- "chrono-tz 0.8.6",
+ "chrono-tz",
 "common-error",
 "common-macro",
 "humantime",
@@ -3420,6 +3378,7 @@ dependencies = [
 "meta-client",
 "metric-engine",
 "mito2",
+ "num_cpus",
 "object-store",
 "prometheus",
 "prost 0.13.3",
@@ -4240,6 +4199,7 @@ dependencies = [
 "meta-client",
 "nom",
 "num-traits",
+ "num_cpus",
 "operator",
 "partition",
 "pretty_assertions",
@@ -4336,6 +4296,7 @@ dependencies = [
 "common-test-util",
 "common-time",
 "common-version",
+ "datafusion",
 "datafusion-expr",
 "datanode",
 "datatypes",
@@ -4345,6 +4306,7 @@ dependencies = [
 "log-query",
 "log-store",
 "meta-client",
+ "num_cpus",
 "opentelemetry-proto 0.27.0",
 "operator",
 "partition",
@@ -4735,7 +4697,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=683e9d10ae7f3dfb8aaabd89082fc600c17e3795#683e9d10ae7f3dfb8aaabd89082fc600c17e3795"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a25adc8a01340231121646d8f0a29d0e92f45461#a25adc8a01340231121646d8f0a29d0e92f45461"
 dependencies = [
 "prost 0.13.3",
 "serde",
@@ -6268,7 +6230,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
 "cfg-if",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -6780,6 +6742,7 @@ version = "0.12.0"
 dependencies = [
 "api",
 "aquamarine",
+ "async-stream",
 "async-trait",
 "base64 0.21.7",
 "common-base",
@@ -6792,6 +6755,7 @@ dependencies = [
 "common-time",
 "datafusion",
 "datatypes",
+ "futures-util",
 "itertools 0.10.5",
 "lazy_static",
 "mito2",
@@ -7884,7 +7848,7 @@ dependencies = [
 "bytemuck",
 "bytes",
 "chrono",
- "chrono-tz 0.10.1",
+ "chrono-tz",
 "fallible-streaming-iterator",
 "flate2",
 "futures",
@@ -8379,7 +8343,7 @@ dependencies = [
 "async-trait",
 "catalog",
 "chrono",
- "chrono-tz 0.9.0",
+ "chrono-tz",
 "common-catalog",
 "common-error",
 "common-function",
@@ -8797,8 +8761,7 @@ dependencies = [
 [[package]]
 name = "promql-parser"
 version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fe99e6f80a79abccf1e8fb48dd63473a36057e600cc6ea36147c8318698ae6f"
+source = "git+https://github.com/GreptimeTeam/promql-parser.git?rev=27abb8e16003a50c720f00d6c85f41f5fa2a2a8e#27abb8e16003a50c720f00d6c85f41f5fa2a2a8e"
 dependencies = [
 "cfgrammar",
 "chrono",
@@ -8869,7 +8832,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
 dependencies = [
 "bytes",
 "heck 0.5.0",
- "itertools 0.11.0",
+ "itertools 0.13.0",
 "log",
 "multimap",
 "once_cell",
@@ -8915,7 +8878,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
 dependencies = [
 "anyhow",
- "itertools 0.11.0",
+ "itertools 0.13.0",
 "proc-macro2",
 "quote",
 "syn 2.0.96",
@@ -9129,6 +9092,7 @@ dependencies = [
 "table",
 "tokio",
 "tokio-stream",
+ "unescaper",
 "uuid",
 ]

@@ -10561,6 +10525,7 @@ dependencies = [
 "session",
 "snafu 0.8.5",
 "snap",
+ "socket2",
 "sql",
 "store-api",
 "strum 0.25.0",
@@ -10930,12 +10895,12 @@ dependencies = [
 [[package]]
 name = "sqlness"
 version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "308a7338f2211813d6e9da117e9b9b7aee5d072872d11a934002fd2bd4ab5276"
+source = "git+https://github.com/CeresDB/sqlness.git?rev=bb91f31ff58993e07ea89845791235138283a24c#bb91f31ff58993e07ea89845791235138283a24c"
 dependencies = [
 "async-trait",
 "derive_builder 0.11.2",
 "duration-str",
+ "futures",
 "minijinja",
 "prettydiff",
 "regex",
@@ -10961,6 +10926,7 @@ dependencies = [
 "hex",
 "local-ip-address",
 "mysql",
+ "num_cpus",
 "reqwest",
 "serde",
 "serde_json",
@@ -12993,6 +12959,23 @@ version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"

+[[package]]
+name = "uddsketch"
+version = "0.1.0"
+source = "git+https://github.com/GreptimeTeam/timescaledb-toolkit.git?rev=84828fe8fb494a6a61412a3da96517fc80f7bb20#84828fe8fb494a6a61412a3da96517fc80f7bb20"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "unescaper"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c878a167baa8afd137494101a688ef8c67125089ff2249284bd2b5f9bfedb815"
+dependencies = [
+ "thiserror 1.0.64",
+]
+
 [[package]]
 name = "unicase"
 version = "2.7.0"
@@ -13409,7 +13392,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.48.0",
+ "windows-sys 0.59.0",
 ]

 [[package]]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,6 +81,7 @@ rust.unknown_lints = "deny"
 rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }

 [workspace.dependencies]
+# DO_NOT_REMOVE_THIS: BEGIN_OF_EXTERNAL_DEPENDENCIES
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
 # selectively turn them on if needed, since we can override default-features = true (from false)
 # for the inherited dependency but cannot do the reverse (override from true to false).
@@ -106,6 +107,7 @@ bitflags = "2.4.1"
 bytemuck = "1.12"
 bytes = { version = "1.7", features = ["serde"] }
 chrono = { version = "0.4", features = ["serde"] }
+chrono-tz = "0.10.1"
 clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
@@ -127,7 +129,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "683e9d10ae7f3dfb8aaabd89082fc600c17e3795" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a25adc8a01340231121646d8f0a29d0e92f45461" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -158,7 +160,9 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.4.3", features = ["ser"] }
+promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
+    "ser",
+], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
 prost = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.8"
@@ -207,6 +211,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"]
 typetag = "0.2"
 uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
 zstd = "0.13"
+# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES

 ## workspaces members
 api = { path = "src/api" }
--- a/config/config.md
+++ b/config/config.md
@@ -40,6 +40,7 @@
 | `mysql.enable` | Bool | `true` | Whether to enable. |
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
 | `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `mysql.tls` | -- | -- | -- |
 | `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
 | `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -49,6 +50,7 @@
 | `postgres.enable` | Bool | `true` | Whether to enable |
 | `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
 | `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
 | `postgres.tls.mode` | String | `disable` | TLS mode. |
 | `postgres.tls.cert_path` | String | Unset | Certificate file path. |
@@ -58,6 +60,8 @@
 | `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
 | `influxdb` | -- | -- | InfluxDB protocol options. |
 | `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `jaeger` | -- | -- | Jaeger protocol options. |
+| `jaeger.enable` | Bool | `true` | Whether to enable Jaeger protocol in HTTP API. |
 | `prom_store` | -- | -- | Prometheus remote storage options |
 | `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
 | `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
@@ -148,6 +152,7 @@
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
 | `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
 | `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
+| `region_engine.mito.index.staging_ttl` | String | `7d` | The TTL of the staging directory.<br/>Defaults to 7 days.<br/>Setting it to "0s" to disable TTL. |
 | `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
 | `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
 | `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
@@ -234,6 +239,7 @@
 | `mysql.enable` | Bool | `true` | Whether to enable. |
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
 | `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `mysql.tls` | -- | -- | -- |
 | `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
 | `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -243,6 +249,7 @@
 | `postgres.enable` | Bool | `true` | Whether to enable |
 | `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
 | `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
+| `postgres.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
 | `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
 | `postgres.tls.mode` | String | `disable` | TLS mode. |
 | `postgres.tls.cert_path` | String | Unset | Certificate file path. |
@@ -252,6 +259,8 @@
 | `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
 | `influxdb` | -- | -- | InfluxDB protocol options. |
 | `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
+| `jaeger` | -- | -- | Jaeger protocol options. |
+| `jaeger.enable` | Bool | `true` | Whether to enable Jaeger protocol in HTTP API. |
 | `prom_store` | -- | -- | Prometheus remote storage options |
 | `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
 | `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
@@ -483,6 +492,7 @@
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
 | `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
 | `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
+| `region_engine.mito.index.staging_ttl` | String | `7d` | The TTL of the staging directory.<br/>Defaults to 7 days.<br/>Setting it to "0s" to disable TTL. |
 | `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
 | `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
 | `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -497,6 +497,11 @@ aux_path = ""
 ## The max capacity of the staging directory.
 staging_size = "2GB"

+## The TTL of the staging directory.
+## Defaults to 7 days.
+## Setting it to "0s" to disable TTL.
+staging_ttl = "7d"
+
 ## Cache size for inverted index metadata.
 metadata_cache_size = "64MiB"

--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -74,6 +74,9 @@ enable = true
 addr = "127.0.0.1:4002"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 # MySQL server TLS options.
 [mysql.tls]
@@ -105,6 +108,9 @@ enable = true
 addr = "127.0.0.1:4003"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 ## PostgresSQL server TLS options, see `mysql.tls` section.
 [postgres.tls]
@@ -132,6 +138,11 @@ enable = true
 ## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

+## Jaeger protocol options.
+[jaeger]
+## Whether to enable Jaeger protocol in HTTP API.
+enable = true
+
 ## Prometheus remote storage options
 [prom_store]
 ## Whether to enable Prometheus remote write and read in HTTP API.
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -78,6 +78,9 @@ enable = true
 addr = "127.0.0.1:4002"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 # MySQL server TLS options.
 [mysql.tls]
@@ -109,6 +112,9 @@ enable = true
 addr = "127.0.0.1:4003"
 ## The number of server worker threads.
 runtime_size = 2
+## Server-side keep-alive time.
+## Set to 0 (default) to disable.
+keep_alive = "0s"

 ## PostgresSQL server TLS options, see `mysql.tls` section.
 [postgres.tls]
@@ -136,6 +142,11 @@ enable = true
 ## Whether to enable InfluxDB protocol in HTTP API.
 enable = true

+## Jaeger protocol options.
+[jaeger]
+## Whether to enable Jaeger protocol in HTTP API.
+enable = true
+
 ## Prometheus remote storage options
 [prom_store]
 ## Whether to enable Prometheus remote write and read in HTTP API.
@@ -573,6 +584,11 @@ aux_path = ""
 ## The max capacity of the staging directory.
 staging_size = "2GB"

+## The TTL of the staging directory.
+## Defaults to 7 days.
+## Setting it to "0s" to disable TTL.
+staging_ttl = "7d"
+
 ## Cache size for inverted index metadata.
 metadata_cache_size = "64MiB"

--- a/docs/logo-text-padding-dark.png
+++ b/docs/logo-text-padding-dark.png
--- a/docs/logo-text-padding.png
+++ b/docs/logo-text-padding.png
--- a/grafana/greptimedb-cluster.json
+++ b/grafana/greptimedb-cluster.json
--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
@@ -384,8 +384,8 @@
        "rowHeight": 0.9,
        "showValue": "auto",
        "tooltip": {
-          "mode": "none",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -483,8 +483,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "pluginVersion": "10.2.3",
@@ -578,8 +578,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "pluginVersion": "10.2.3",
@@ -601,7 +601,7 @@
      "type": "timeseries"
    },
    {
-      "collapsed": true,
+      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
@@ -684,8 +684,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -878,8 +878,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1124,8 +1124,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1223,8 +1223,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1322,8 +1322,8 @@
              "showLegend": true
            },
            "tooltip": {
-              "mode": "single",
-              "sort": "none"
+              "mode": "multi",
+              "sort": "desc"
            }
          },
          "targets": [
@@ -1456,8 +1456,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1573,8 +1573,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1673,8 +1673,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1773,8 +1773,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -1890,8 +1890,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2002,8 +2002,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2120,8 +2120,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2233,8 +2233,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2334,8 +2334,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2435,8 +2435,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2548,8 +2548,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2661,8 +2661,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2788,8 +2788,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2889,8 +2889,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -2990,8 +2990,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3091,8 +3091,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3191,8 +3191,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3302,8 +3302,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3432,8 +3432,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3543,8 +3543,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3657,8 +3657,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3808,8 +3808,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -3909,8 +3909,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -4011,8 +4011,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
@@ -4113,8 +4113,8 @@
          "showLegend": true
        },
        "tooltip": {
-          "mode": "single",
-          "sort": "none"
+          "mode": "multi",
+          "sort": "desc"
        }
      },
      "targets": [
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -15,13 +15,10 @@ common-macro.workspace = true
 common-time.workspace = true
 datatypes.workspace = true
 greptime-proto.workspace = true
-paste = "1.0"
+paste.workspace = true
 prost.workspace = true
 serde_json.workspace = true
 snafu.workspace = true

 [build-dependencies]
 tonic-build = "0.11"
-
-[dev-dependencies]
-paste = "1.0"
--- a/src/api/src/v1/column_def.rs
+++ b/src/api/src/v1/column_def.rs
@@ -15,10 +15,10 @@
 use std::collections::HashMap;

 use datatypes::schema::{
-    ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
-    FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
+    ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexType,
+    COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
 };
-use greptime_proto::v1::Analyzer;
+use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType};
 use snafu::ResultExt;

 use crate::error::{self, Result};
@@ -121,6 +121,13 @@ pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
    }
 }

+/// Tries to construct a `SkippingIndexType` from the given skipping index type.
+pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
+    match skipping_index_type {
+        PbSkippingIndexType::BloomFilter => SkippingIndexType::BloomFilter,
+    }
+}
+
 #[cfg(test)]
 mod tests {

--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -15,7 +15,7 @@ api.workspace = true
 arrow.workspace = true
 arrow-schema.workspace = true
 async-stream.workspace = true
-async-trait = "0.1"
+async-trait.workspace = true
 bytes.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
@@ -31,7 +31,7 @@ common-version.workspace = true
 dashmap.workspace = true
 datafusion.workspace = true
 datatypes.workspace = true
-futures = "0.3"
+futures.workspace = true
 futures-util.workspace = true
 humantime.workspace = true
 itertools.workspace = true
@@ -39,7 +39,7 @@ lazy_static.workspace = true
 meta-client.workspace = true
 moka = { workspace = true, features = ["future", "sync"] }
 partition.workspace = true
-paste = "1.0"
+paste.workspace = true
 prometheus.workspace = true
 rustc-hash.workspace = true
 serde_json.workspace = true
@@ -49,7 +49,7 @@ sql.workspace = true
 store-api.workspace = true
 table.workspace = true
 tokio.workspace = true
-tokio-stream = "0.1"
+tokio-stream.workspace = true

 [dev-dependencies]
 cache.workspace = true
--- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs
+++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs
@@ -228,12 +228,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
                let keys = &table_info.meta.primary_key_indices;
                let schema = table.schema();

-                // For compatibility, use primary key columns as inverted index columns.
-                let pk_as_inverted_index = !schema
-                    .column_schemas()
-                    .iter()
-                    .any(|c| c.has_inverted_index_key());
-
                for (idx, column) in schema.column_schemas().iter().enumerate() {
                    let mut constraints = vec![];
                    if column.is_time_index() {
@@ -251,10 +245,6 @@ impl InformationSchemaKeyColumnUsageBuilder {
                    // TODO(dimbtp): foreign key constraint not supported yet
                    if keys.contains(&idx) {
                        constraints.push(PRI_CONSTRAINT_NAME);
-
-                        if pk_as_inverted_index {
-                            constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
-                        }
                    }
                    if column.is_inverted_indexed() {
                        constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
--- a/src/cli/src/bench.rs
+++ b/src/cli/src/bench.rs
@@ -24,7 +24,7 @@ use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
 use common_meta::kv_backend::etcd::EtcdStore;
 use common_meta::kv_backend::memory::MemoryKvBackend;
 #[cfg(feature = "pg_kvbackend")]
-use common_meta::kv_backend::postgres::PgStore;
+use common_meta::kv_backend::rds::PgStore;
 use common_meta::peer::Peer;
 use common_meta::rpc::router::{Region, RegionRoute};
 use common_telemetry::info;
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -42,7 +42,7 @@ pub struct Instance {
 }

 impl Instance {
-    fn new(instance: MetasrvInstance, guard: Vec<WorkerGuard>) -> Self {
+    pub fn new(instance: MetasrvInstance, guard: Vec<WorkerGuard>) -> Self {
        Self {
            instance,
            _guard: guard,
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -60,7 +60,8 @@ use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
 use frontend::server::Services;
 use frontend::service_config::{
-    InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
+    InfluxdbOptions, JaegerOptions, MysqlOptions, OpentsdbOptions, PostgresOptions,
+    PromStoreOptions,
 };
 use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
 use mito2::config::MitoConfig;
@@ -140,6 +141,7 @@ pub struct StandaloneOptions {
    pub postgres: PostgresOptions,
    pub opentsdb: OpentsdbOptions,
    pub influxdb: InfluxdbOptions,
+    pub jaeger: JaegerOptions,
    pub prom_store: PromStoreOptions,
    pub wal: DatanodeWalConfig,
    pub storage: StorageConfig,
@@ -169,6 +171,7 @@ impl Default for StandaloneOptions {
            postgres: PostgresOptions::default(),
            opentsdb: OpentsdbOptions::default(),
            influxdb: InfluxdbOptions::default(),
+            jaeger: JaegerOptions::default(),
            prom_store: PromStoreOptions::default(),
            wal: DatanodeWalConfig::default(),
            storage: StorageConfig::default(),
@@ -217,6 +220,7 @@ impl StandaloneOptions {
            postgres: cloned_opts.postgres,
            opentsdb: cloned_opts.opentsdb,
            influxdb: cloned_opts.influxdb,
+            jaeger: cloned_opts.jaeger,
            prom_store: cloned_opts.prom_store,
            meta_client: None,
            logging: cloned_opts.logging,
--- a/src/common/base/Cargo.toml
+++ b/src/common/base/Cargo.toml
@@ -18,7 +18,7 @@ bytes.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 futures.workspace = true
-paste = "1.0"
+paste.workspace = true
 pin-project.workspace = true
 rand.workspace = true
 serde = { version = "1.0", features = ["derive"] }
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -35,7 +35,7 @@ orc-rust = { version = "0.5", default-features = false, features = [
    "async",
 ] }
 parquet.workspace = true
-paste = "1.0"
+paste.workspace = true
 rand.workspace = true
 regex = "1.7"
 serde.workspace = true
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -15,6 +15,7 @@ geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
 api.workspace = true
 arc-swap = "1.0"
 async-trait.workspace = true
+bincode = "1.3"
 common-base.workspace = true
 common-catalog.workspace = true
 common-error.workspace = true
@@ -37,7 +38,7 @@ nalgebra.workspace = true
 num = "0.4"
 num-traits = "0.2"
 once_cell.workspace = true
-paste = "1.0"
+paste.workspace = true
 s2 = { version = "0.0.12", optional = true }
 serde.workspace = true
 serde_json.workspace = true
@@ -47,6 +48,7 @@ sql.workspace = true
 statrs = "0.16"
 store-api.workspace = true
 table.workspace = true
+uddsketch = { git = "https://github.com/GreptimeTeam/timescaledb-toolkit.git", rev = "84828fe8fb494a6a61412a3da96517fc80f7bb20" }
 wkt = { version = "0.11", optional = true }

 [dev-dependencies]
--- a/src/common/function/src/aggr.rs
+++ b/src/common/function/src/aggr.rs
@@ -0,0 +1,17 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod uddsketch_state;
+
+pub use uddsketch_state::{UddSketchState, UDDSKETCH_STATE_NAME};
--- a/src/common/function/src/aggr/uddsketch_state.rs
+++ b/src/common/function/src/aggr/uddsketch_state.rs
@@ -0,0 +1,307 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_query::prelude::*;
+use common_telemetry::trace;
+use datafusion::common::cast::{as_binary_array, as_primitive_array};
+use datafusion::common::not_impl_err;
+use datafusion::error::{DataFusionError, Result as DfResult};
+use datafusion::logical_expr::function::AccumulatorArgs;
+use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
+use datafusion::physical_plan::expressions::Literal;
+use datafusion::prelude::create_udaf;
+use datatypes::arrow::array::ArrayRef;
+use datatypes::arrow::datatypes::{DataType, Float64Type};
+use uddsketch::{SketchHashKey, UDDSketch};
+
+pub const UDDSKETCH_STATE_NAME: &str = "uddsketch_state";
+
+#[derive(Debug)]
+pub struct UddSketchState {
+    uddsketch: UDDSketch,
+}
+
+impl UddSketchState {
+    pub fn new(bucket_size: u64, error_rate: f64) -> Self {
+        Self {
+            uddsketch: UDDSketch::new(bucket_size, error_rate),
+        }
+    }
+
+    pub fn udf_impl() -> AggregateUDF {
+        create_udaf(
+            UDDSKETCH_STATE_NAME,
+            vec![DataType::Int64, DataType::Float64, DataType::Float64],
+            Arc::new(DataType::Binary),
+            Volatility::Immutable,
+            Arc::new(|args| {
+                let (bucket_size, error_rate) = downcast_accumulator_args(args)?;
+                Ok(Box::new(UddSketchState::new(bucket_size, error_rate)))
+            }),
+            Arc::new(vec![DataType::Binary]),
+        )
+    }
+
+    fn update(&mut self, value: f64) {
+        self.uddsketch.add_value(value);
+    }
+
+    fn merge(&mut self, raw: &[u8]) {
+        if let Ok(uddsketch) = bincode::deserialize::<UDDSketch>(raw) {
+            if uddsketch.count() != 0 {
+                self.uddsketch.merge_sketch(&uddsketch);
+            }
+        } else {
+            trace!("Warning: Failed to deserialize UDDSketch from {:?}", raw);
+        }
+    }
+}
+
+fn downcast_accumulator_args(args: AccumulatorArgs) -> DfResult<(u64, f64)> {
+    let bucket_size = match args.exprs[0]
+        .as_any()
+        .downcast_ref::<Literal>()
+        .map(|lit| lit.value())
+    {
+        Some(ScalarValue::Int64(Some(value))) => *value as u64,
+        _ => {
+            return not_impl_err!(
+                "{} not supported for bucket size: {}",
+                UDDSKETCH_STATE_NAME,
+                &args.exprs[0]
+            )
+        }
+    };
+
+    let error_rate = match args.exprs[1]
+        .as_any()
+        .downcast_ref::<Literal>()
+        .map(|lit| lit.value())
+    {
+        Some(ScalarValue::Float64(Some(value))) => *value,
+        _ => {
+            return not_impl_err!(
+                "{} not supported for error rate: {}",
+                UDDSKETCH_STATE_NAME,
+                &args.exprs[1]
+            )
+        }
+    };
+
+    Ok((bucket_size, error_rate))
+}
+
+impl DfAccumulator for UddSketchState {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
+        let array = &values[2]; // the third column is data value
+        let f64_array = as_primitive_array::<Float64Type>(array)?;
+        for v in f64_array.iter().flatten() {
+            self.update(v);
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> DfResult<ScalarValue> {
+        Ok(ScalarValue::Binary(Some(
+            bincode::serialize(&self.uddsketch).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
+            })?,
+        )))
+    }
+
+    fn size(&self) -> usize {
+        // Base size of UDDSketch struct fields
+        let mut total_size = std::mem::size_of::<f64>() * 3 + // alpha, gamma, values_sum
+                            std::mem::size_of::<u32>() +      // compactions
+                            std::mem::size_of::<u64>() * 2; // max_buckets, num_values
+
+        // Size of buckets (SketchHashMap)
+        // Each bucket entry contains:
+        // - SketchHashKey (enum with i64/Zero/Invalid variants)
+        // - SketchHashEntry (count: u64, next: SketchHashKey)
+        let bucket_entry_size = std::mem::size_of::<SketchHashKey>() + // key
+                               std::mem::size_of::<u64>() +            // count
+                               std::mem::size_of::<SketchHashKey>(); // next
+
+        total_size += self.uddsketch.current_buckets_count() * bucket_entry_size;
+
+        total_size
+    }
+
+    fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
+        Ok(vec![ScalarValue::Binary(Some(
+            bincode::serialize(&self.uddsketch).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
+            })?,
+        ))])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
+        let array = &states[0];
+        let binary_array = as_binary_array(array)?;
+        for v in binary_array.iter().flatten() {
+            self.merge(v);
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::arrow::array::{BinaryArray, Float64Array};
+
+    use super::*;
+
+    #[test]
+    fn test_uddsketch_state_basic() {
+        let mut state = UddSketchState::new(10, 0.01);
+        state.update(1.0);
+        state.update(2.0);
+        state.update(3.0);
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(deserialized.count(), 3);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_roundtrip() {
+        let mut state = UddSketchState::new(10, 0.01);
+        state.update(1.0);
+        state.update(2.0);
+
+        // Serialize
+        let serialized = state.evaluate().unwrap();
+
+        // Create new state and merge the serialized data
+        let mut new_state = UddSketchState::new(10, 0.01);
+        if let ScalarValue::Binary(Some(bytes)) = &serialized {
+            new_state.merge(bytes);
+
+            // Verify the merged state matches original by comparing deserialized values
+            let original_sketch: UDDSketch = bincode::deserialize(bytes).unwrap();
+            let new_result = new_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(new_bytes)) = new_result {
+                let new_sketch: UDDSketch = bincode::deserialize(&new_bytes).unwrap();
+                assert_eq!(original_sketch.count(), new_sketch.count());
+                assert_eq!(original_sketch.sum(), new_sketch.sum());
+                assert_eq!(original_sketch.mean(), new_sketch.mean());
+                assert_eq!(original_sketch.max_error(), new_sketch.max_error());
+                // Compare a few quantiles to ensure statistical equivalence
+                for q in [0.1, 0.5, 0.9].iter() {
+                    assert!(
+                        (original_sketch.estimate_quantile(*q) - new_sketch.estimate_quantile(*q))
+                            .abs()
+                            < 1e-10,
+                        "Quantile {} mismatch: original={}, new={}",
+                        q,
+                        original_sketch.estimate_quantile(*q),
+                        new_sketch.estimate_quantile(*q)
+                    );
+                }
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_batch_update() {
+        let mut state = UddSketchState::new(10, 0.01);
+        let values = vec![1.0f64, 2.0, 3.0];
+        let array = Arc::new(Float64Array::from(values)) as ArrayRef;
+
+        state
+            .update_batch(&[array.clone(), array.clone(), array])
+            .unwrap();
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(deserialized.count(), 3);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_merge_batch() {
+        let mut state1 = UddSketchState::new(10, 0.01);
+        state1.update(1.0);
+        let state1_binary = state1.evaluate().unwrap();
+
+        let mut state2 = UddSketchState::new(10, 0.01);
+        state2.update(2.0);
+        let state2_binary = state2.evaluate().unwrap();
+
+        let mut merged_state = UddSketchState::new(10, 0.01);
+        if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
+            (&state1_binary, &state2_binary)
+        {
+            let binary_array = Arc::new(BinaryArray::from(vec![
+                bytes1.as_slice(),
+                bytes2.as_slice(),
+            ])) as ArrayRef;
+            merged_state.merge_batch(&[binary_array]).unwrap();
+
+            let result = merged_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(bytes)) = result {
+                let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
+                assert_eq!(deserialized.count(), 2);
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar values");
+        }
+    }
+
+    #[test]
+    fn test_uddsketch_state_size() {
+        let mut state = UddSketchState::new(10, 0.01);
+        let initial_size = state.size();
+
+        // Add some values to create buckets
+        state.update(1.0);
+        state.update(2.0);
+        state.update(3.0);
+
+        let size_with_values = state.size();
+        assert!(
+            size_with_values > initial_size,
+            "Size should increase after adding values: initial={}, with_values={}",
+            initial_size,
+            size_with_values
+        );
+
+        // Verify size increases with more buckets
+        state.update(10.0); // This should create a new bucket
+        assert!(
+            state.size() > size_with_values,
+            "Size should increase after adding new bucket: prev={}, new={}",
+            size_with_values,
+            state.size()
+        );
+    }
+}
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -26,6 +26,7 @@ use crate::scalars::json::JsonFunction;
 use crate::scalars::matches::MatchesFunction;
 use crate::scalars::math::MathFunction;
 use crate::scalars::timestamp::TimestampFunction;
+use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
 use crate::scalars::vector::VectorFunction;
 use crate::system::SystemFunction;
 use crate::table::TableFunction;
@@ -105,6 +106,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
    TimestampFunction::register(&function_registry);
    DateFunction::register(&function_registry);
    ExpressionFunction::register(&function_registry);
+    UddSketchCalcFunction::register(&function_registry);

    // Aggregate functions
    AggregateFunctions::register(&function_registry);
--- a/src/common/function/src/lib.rs
+++ b/src/common/function/src/lib.rs
@@ -21,6 +21,7 @@ pub mod scalars;
 mod system;
 mod table;

+pub mod aggr;
 pub mod function;
 pub mod function_registry;
 pub mod handlers;
--- a/src/common/function/src/scalars.rs
+++ b/src/common/function/src/scalars.rs
@@ -25,4 +25,5 @@ pub mod vector;
 #[cfg(test)]
 pub(crate) mod test;
 pub(crate) mod timestamp;
+pub(crate) mod uddsketch_calc;
 pub mod udf;
--- a/src/common/function/src/scalars/json.rs
+++ b/src/common/function/src/scalars/json.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use std::sync::Arc;
-mod json_get;
+pub mod json_get;
 mod json_is;
 mod json_path_exists;
 mod json_path_match;
--- a/src/common/function/src/scalars/uddsketch_calc.rs
+++ b/src/common/function/src/scalars/uddsketch_calc.rs
@@ -0,0 +1,211 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of the scalar function `uddsketch_calc`.
+
+use std::fmt;
+use std::fmt::Display;
+use std::sync::Arc;
+
+use common_query::error::{DowncastVectorSnafu, InvalidFuncArgsSnafu, Result};
+use common_query::prelude::{Signature, Volatility};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::prelude::Vector;
+use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
+use datatypes::vectors::{BinaryVector, Float64VectorBuilder, MutableVector, VectorRef};
+use snafu::OptionExt;
+use uddsketch::UDDSketch;
+
+use crate::function::{Function, FunctionContext};
+use crate::function_registry::FunctionRegistry;
+
+const NAME: &str = "uddsketch_calc";
+
+/// UddSketchCalcFunction implements the scalar function `uddsketch_calc`.
+///
+/// It accepts two arguments:
+/// 1. A percentile (as f64) for which to compute the estimated quantile (e.g. 0.95 for p95).
+/// 2. The serialized UDDSketch state, as produced by the aggregator (binary).
+///
+/// For each row, it deserializes the sketch and returns the computed quantile value.
+#[derive(Debug, Default)]
+pub struct UddSketchCalcFunction;
+
+impl UddSketchCalcFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register(Arc::new(UddSketchCalcFunction));
+    }
+}
+
+impl Display for UddSketchCalcFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+impl Function for UddSketchCalcFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::float64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        // First argument: percentile (float64)
+        // Second argument: UDDSketch state (binary)
+        Signature::exact(
+            vec![
+                ConcreteDataType::float64_datatype(),
+                ConcreteDataType::binary_datatype(),
+            ],
+            Volatility::Immutable,
+        )
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        if columns.len() != 2 {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!("uddsketch_calc expects 2 arguments, got {}", columns.len()),
+            }
+            .fail();
+        }
+
+        let perc_vec = &columns[0];
+        let sketch_vec = columns[1]
+            .as_any()
+            .downcast_ref::<BinaryVector>()
+            .with_context(|| DowncastVectorSnafu {
+                err_msg: format!("expect BinaryVector, got {}", columns[1].vector_type_name()),
+            })?;
+        let len = sketch_vec.len();
+        let mut builder = Float64VectorBuilder::with_capacity(len);
+
+        for i in 0..len {
+            let perc_opt = perc_vec.get(i).as_f64_lossy();
+            let sketch_opt = sketch_vec.get_data(i);
+
+            if sketch_opt.is_none() || perc_opt.is_none() {
+                builder.push_null();
+                continue;
+            }
+
+            let sketch_bytes = sketch_opt.unwrap();
+            let perc = perc_opt.unwrap();
+
+            // Deserialize the UDDSketch from its bincode representation
+            let sketch: UDDSketch = match bincode::deserialize(sketch_bytes) {
+                Ok(s) => s,
+                Err(e) => {
+                    common_telemetry::trace!("Failed to deserialize UDDSketch: {}", e);
+                    builder.push_null();
+                    continue;
+                }
+            };
+
+            // Compute the estimated quantile from the sketch
+            let result = sketch.estimate_quantile(perc);
+            builder.push(Some(result));
+        }
+
+        Ok(builder.to_vector())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datatypes::vectors::{BinaryVector, Float64Vector};
+
+    use super::*;
+
+    #[test]
+    fn test_uddsketch_calc_function() {
+        let function = UddSketchCalcFunction;
+        assert_eq!("uddsketch_calc", function.name());
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            function
+                .return_type(&[ConcreteDataType::float64_datatype()])
+                .unwrap()
+        );
+
+        // Create a test sketch
+        let mut sketch = UDDSketch::new(128, 0.01);
+        sketch.add_value(10.0);
+        sketch.add_value(20.0);
+        sketch.add_value(30.0);
+        sketch.add_value(40.0);
+        sketch.add_value(50.0);
+        sketch.add_value(60.0);
+        sketch.add_value(70.0);
+        sketch.add_value(80.0);
+        sketch.add_value(90.0);
+        sketch.add_value(100.0);
+
+        // Get expected values directly from the sketch
+        let expected_p50 = sketch.estimate_quantile(0.5);
+        let expected_p90 = sketch.estimate_quantile(0.9);
+        let expected_p95 = sketch.estimate_quantile(0.95);
+
+        let serialized = bincode::serialize(&sketch).unwrap();
+        let percentiles = vec![0.5, 0.9, 0.95];
+
+        let args: Vec<VectorRef> = vec![
+            Arc::new(Float64Vector::from_vec(percentiles.clone())),
+            Arc::new(BinaryVector::from(vec![Some(serialized.clone()); 3])),
+        ];
+
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 3);
+
+        // Test median (p50)
+        assert!(
+            matches!(result.get(0), datatypes::value::Value::Float64(v) if (v - expected_p50).abs() < 1e-10)
+        );
+        // Test p90
+        assert!(
+            matches!(result.get(1), datatypes::value::Value::Float64(v) if (v - expected_p90).abs() < 1e-10)
+        );
+        // Test p95
+        assert!(
+            matches!(result.get(2), datatypes::value::Value::Float64(v) if (v - expected_p95).abs() < 1e-10)
+        );
+    }
+
+    #[test]
+    fn test_uddsketch_calc_function_errors() {
+        let function = UddSketchCalcFunction;
+
+        // Test with invalid number of arguments
+        let args: Vec<VectorRef> = vec![Arc::new(Float64Vector::from_vec(vec![0.95]))];
+        let result = function.eval(FunctionContext::default(), &args);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("uddsketch_calc expects 2 arguments"));
+
+        // Test with invalid binary data
+        let args: Vec<VectorRef> = vec![
+            Arc::new(Float64Vector::from_vec(vec![0.95])),
+            Arc::new(BinaryVector::from(vec![Some(vec![1, 2, 3])])), // Invalid binary data
+        ];
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 1);
+        assert!(matches!(result.get(0), datatypes::value::Value::Null));
+    }
+}
--- a/src/common/grpc-expr/Cargo.toml
+++ b/src/common/grpc-expr/Cargo.toml
@@ -22,4 +22,4 @@ store-api.workspace = true
 table.workspace = true

 [dev-dependencies]
-paste = "1.0"
+paste.workspace = true
--- a/src/common/grpc-expr/src/alter.rs
+++ b/src/common/grpc-expr/src/alter.rs
@@ -15,13 +15,14 @@
 use api::helper::ColumnDataTypeWrapper;
 use api::v1::add_column_location::LocationType;
 use api::v1::alter_table_expr::Kind;
-use api::v1::column_def::as_fulltext_option;
+use api::v1::column_def::{as_fulltext_option, as_skipping_index_type};
 use api::v1::{
    column_def, AddColumnLocation as Location, AlterTableExpr, Analyzer, CreateTableExpr,
    DropColumns, ModifyColumnTypes, RenameTable, SemanticType,
+    SkippingIndexType as PbSkippingIndexType,
 };
 use common_query::AddColumnLocation;
-use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema};
+use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema, SkippingIndexOptions};
 use snafu::{ensure, OptionExt, ResultExt};
 use store_api::region_request::{SetRegionOption, UnsetRegionOption};
 use table::metadata::TableId;
@@ -31,7 +32,8 @@ use table::requests::{
 };

 use crate::error::{
-    InvalidColumnDefSnafu, InvalidSetFulltextOptionRequestSnafu, InvalidSetTableOptionRequestSnafu,
+    InvalidColumnDefSnafu, InvalidSetFulltextOptionRequestSnafu,
+    InvalidSetSkippingIndexOptionRequestSnafu, InvalidSetTableOptionRequestSnafu,
    InvalidUnsetTableOptionRequestSnafu, MissingAlterIndexOptionSnafu, MissingFieldSnafu,
    MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu,
 };
@@ -137,6 +139,18 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
                        column_name: i.column_name,
                    },
                },
+                api::v1::set_index::Options::Skipping(s) => AlterKind::SetIndex {
+                    options: SetIndexOptions::Skipping {
+                        column_name: s.column_name,
+                        options: SkippingIndexOptions {
+                            granularity: s.granularity as u32,
+                            index_type: as_skipping_index_type(
+                                PbSkippingIndexType::try_from(s.skipping_index_type)
+                                    .context(InvalidSetSkippingIndexOptionRequestSnafu)?,
+                            ),
+                        },
+                    },
+                },
            },
            None => return MissingAlterIndexOptionSnafu.fail(),
        },
@@ -152,6 +166,11 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
                        column_name: i.column_name,
                    },
                },
+                api::v1::unset_index::Options::Skipping(s) => AlterKind::UnsetIndex {
+                    options: UnsetIndexOptions::Skipping {
+                        column_name: s.column_name,
+                    },
+                },
            },
            None => return MissingAlterIndexOptionSnafu.fail(),
        },
--- a/src/common/grpc-expr/src/error.rs
+++ b/src/common/grpc-expr/src/error.rs
@@ -140,6 +140,14 @@ pub enum Error {
        error: prost::UnknownEnumValue,
    },

+    #[snafu(display("Invalid set skipping index option request"))]
+    InvalidSetSkippingIndexOptionRequest {
+        #[snafu(implicit)]
+        location: Location,
+        #[snafu(source)]
+        error: prost::UnknownEnumValue,
+    },
+
    #[snafu(display("Missing alter index options"))]
    MissingAlterIndexOption {
        #[snafu(implicit)]
@@ -171,6 +179,7 @@ impl ErrorExt for Error {
            Error::InvalidSetTableOptionRequest { .. }
            | Error::InvalidUnsetTableOptionRequest { .. }
            | Error::InvalidSetFulltextOptionRequest { .. }
+            | Error::InvalidSetSkippingIndexOptionRequest { .. }
            | Error::MissingAlterIndexOption { .. } => StatusCode::InvalidArguments,
        }
    }
--- a/src/common/grpc-expr/src/insert.rs
+++ b/src/common/grpc-expr/src/insert.rs
@@ -14,37 +14,12 @@

 use api::helper;
 use api::v1::column::Values;
-use api::v1::{Column, CreateTableExpr};
 use common_base::BitVec;
 use datatypes::data_type::{ConcreteDataType, DataType};
 use datatypes::prelude::VectorRef;
 use snafu::{ensure, ResultExt};
-use table::metadata::TableId;
-use table::table_reference::TableReference;

 use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu};
-use crate::util;
-use crate::util::ColumnExpr;
-
-/// Try to build create table request from insert data.
-pub fn build_create_expr_from_insertion(
-    catalog_name: &str,
-    schema_name: &str,
-    table_id: Option<TableId>,
-    table_name: &str,
-    columns: &[Column],
-    engine: &str,
-) -> Result<CreateTableExpr> {
-    let table_name = TableReference::full(catalog_name, schema_name, table_name);
-    let column_exprs = ColumnExpr::from_columns(columns);
-    util::build_create_table_expr(
-        table_id,
-        &table_name,
-        column_exprs,
-        engine,
-        "Created on insertion",
-    )
-}

 pub(crate) fn add_values_to_builder(
    data_type: ConcreteDataType,
@@ -87,276 +62,7 @@ fn is_null(null_mask: &BitVec, idx: usize) -> Option<bool> {

 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-    use std::{assert_eq, vec};
-
-    use api::helper::ColumnDataTypeWrapper;
-    use api::v1::column::Values;
-    use api::v1::column_data_type_extension::TypeExt;
-    use api::v1::{
-        Column, ColumnDataType, ColumnDataTypeExtension, Decimal128, DecimalTypeExtension,
-        IntervalMonthDayNano, SemanticType,
-    };
-    use common_base::BitVec;
-    use common_catalog::consts::MITO_ENGINE;
-    use common_time::interval::IntervalUnit;
-    use common_time::timestamp::TimeUnit;
-    use datatypes::data_type::ConcreteDataType;
-    use datatypes::schema::{ColumnSchema, SchemaBuilder};
-    use snafu::ResultExt;
-
    use super::*;
-    use crate::error;
-    use crate::error::ColumnDataTypeSnafu;
-
-    #[inline]
-    fn build_column_schema(
-        column_name: &str,
-        datatype: i32,
-        nullable: bool,
-    ) -> error::Result<ColumnSchema> {
-        let datatype_wrapper =
-            ColumnDataTypeWrapper::try_new(datatype, None).context(ColumnDataTypeSnafu)?;
-
-        Ok(ColumnSchema::new(
-            column_name,
-            datatype_wrapper.into(),
-            nullable,
-        ))
-    }
-
-    #[test]
-    fn test_build_create_table_request() {
-        let table_id = Some(10);
-        let table_name = "test_metric";
-
-        assert!(
-            build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
-                .is_err()
-        );
-
-        let insert_batch = mock_insert_batch();
-
-        let create_expr = build_create_expr_from_insertion(
-            "",
-            "",
-            table_id,
-            table_name,
-            &insert_batch.0,
-            MITO_ENGINE,
-        )
-        .unwrap();
-
-        assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
-        assert_eq!(table_name, create_expr.table_name);
-        assert_eq!("Created on insertion".to_string(), create_expr.desc);
-        assert_eq!(
-            vec![create_expr.column_defs[0].name.clone()],
-            create_expr.primary_keys
-        );
-
-        let column_defs = create_expr.column_defs;
-        assert_eq!(column_defs[5].name, create_expr.time_index);
-        assert_eq!(7, column_defs.len());
-
-        assert_eq!(
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "host")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "cpu")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "memory")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "time")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "interval")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        assert_eq!(
-            ConcreteDataType::timestamp_millisecond_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    column_defs
-                        .iter()
-                        .find(|c| c.name == "ts")
-                        .unwrap()
-                        .data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-
-        let decimal_column = column_defs.iter().find(|c| c.name == "decimals").unwrap();
-        assert_eq!(
-            ConcreteDataType::decimal128_datatype(38, 10),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    decimal_column.data_type,
-                    decimal_column.datatype_extension,
-                )
-                .unwrap()
-            )
-        );
-    }
-
-    #[test]
-    fn test_find_new_columns() {
-        let mut columns = Vec::with_capacity(1);
-        let cpu_column = build_column_schema("cpu", 10, true).unwrap();
-        let ts_column = build_column_schema("ts", 15, false)
-            .unwrap()
-            .with_time_index(true);
-        columns.push(cpu_column);
-        columns.push(ts_column);
-
-        let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
-
-        assert!(
-            util::extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
-                .unwrap()
-                .is_none()
-        );
-
-        let insert_batch = mock_insert_batch();
-
-        let add_columns =
-            util::extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
-                .unwrap()
-                .unwrap();
-
-        assert_eq!(5, add_columns.add_columns.len());
-        let host_column = &add_columns.add_columns[0];
-        assert_eq!(
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    host_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let memory_column = &add_columns.add_columns[1];
-        assert_eq!(
-            ConcreteDataType::float64_datatype(),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    memory_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let time_column = &add_columns.add_columns[2];
-        assert_eq!(
-            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    time_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let interval_column = &add_columns.add_columns[3];
-        assert_eq!(
-            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    interval_column.column_def.as_ref().unwrap().data_type,
-                    None
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-
-        let decimal_column = &add_columns.add_columns[4];
-        assert_eq!(
-            ConcreteDataType::decimal128_datatype(38, 10),
-            ConcreteDataType::from(
-                ColumnDataTypeWrapper::try_new(
-                    decimal_column.column_def.as_ref().unwrap().data_type,
-                    decimal_column
-                        .column_def
-                        .as_ref()
-                        .unwrap()
-                        .datatype_extension
-                )
-                .unwrap()
-            )
-        );
-        assert!(host_column.add_if_not_exists);
-    }

    #[test]
    fn test_is_null() {
@@ -371,127 +77,4 @@ mod tests {
        assert_eq!(None, is_null(&null_mask, 16));
        assert_eq!(None, is_null(&null_mask, 99));
    }
-
-    fn mock_insert_batch() -> (Vec<Column>, u32) {
-        let row_count = 2;
-
-        let host_vals = Values {
-            string_values: vec!["host1".to_string(), "host2".to_string()],
-            ..Default::default()
-        };
-        let host_column = Column {
-            column_name: "host".to_string(),
-            semantic_type: SemanticType::Tag as i32,
-            values: Some(host_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::String as i32,
-            ..Default::default()
-        };
-
-        let cpu_vals = Values {
-            f64_values: vec![0.31],
-            ..Default::default()
-        };
-        let cpu_column = Column {
-            column_name: "cpu".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(cpu_vals),
-            null_mask: vec![2],
-            datatype: ColumnDataType::Float64 as i32,
-            ..Default::default()
-        };
-
-        let mem_vals = Values {
-            f64_values: vec![0.1],
-            ..Default::default()
-        };
-        let mem_column = Column {
-            column_name: "memory".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(mem_vals),
-            null_mask: vec![1],
-            datatype: ColumnDataType::Float64 as i32,
-            ..Default::default()
-        };
-
-        let time_vals = Values {
-            time_millisecond_values: vec![100, 101],
-            ..Default::default()
-        };
-        let time_column = Column {
-            column_name: "time".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(time_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::TimeMillisecond as i32,
-            ..Default::default()
-        };
-
-        let interval1 = IntervalMonthDayNano {
-            months: 1,
-            days: 2,
-            nanoseconds: 3,
-        };
-        let interval2 = IntervalMonthDayNano {
-            months: 4,
-            days: 5,
-            nanoseconds: 6,
-        };
-        let interval_vals = Values {
-            interval_month_day_nano_values: vec![interval1, interval2],
-            ..Default::default()
-        };
-        let interval_column = Column {
-            column_name: "interval".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(interval_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::IntervalMonthDayNano as i32,
-            ..Default::default()
-        };
-
-        let ts_vals = Values {
-            timestamp_millisecond_values: vec![100, 101],
-            ..Default::default()
-        };
-        let ts_column = Column {
-            column_name: "ts".to_string(),
-            semantic_type: SemanticType::Timestamp as i32,
-            values: Some(ts_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::TimestampMillisecond as i32,
-            ..Default::default()
-        };
-        let decimal_vals = Values {
-            decimal128_values: vec![Decimal128 { hi: 0, lo: 123 }, Decimal128 { hi: 0, lo: 456 }],
-            ..Default::default()
-        };
-        let decimal_column = Column {
-            column_name: "decimals".to_string(),
-            semantic_type: SemanticType::Field as i32,
-            values: Some(decimal_vals),
-            null_mask: vec![0],
-            datatype: ColumnDataType::Decimal128 as i32,
-            datatype_extension: Some(ColumnDataTypeExtension {
-                type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
-                    precision: 38,
-                    scale: 10,
-                })),
-            }),
-            options: None,
-        };
-
-        (
-            vec![
-                host_column,
-                cpu_column,
-                mem_column,
-                time_column,
-                interval_column,
-                ts_column,
-                decimal_column,
-            ],
-            row_count,
-        )
-    }
 }
--- a/src/common/grpc-expr/src/lib.rs
+++ b/src/common/grpc-expr/src/lib.rs
@@ -19,4 +19,3 @@ pub mod insert;
 pub mod util;

 pub use alter::{alter_expr_to_request, create_table_schema};
-pub use insert::build_create_expr_from_insertion;
--- a/src/common/grpc-expr/src/util.rs
+++ b/src/common/grpc-expr/src/util.rs
@@ -236,3 +236,414 @@ pub fn extract_new_columns(
        }))
    }
 }
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+    use std::{assert_eq, vec};
+
+    use api::helper::ColumnDataTypeWrapper;
+    use api::v1::column::Values;
+    use api::v1::column_data_type_extension::TypeExt;
+    use api::v1::{
+        Column, ColumnDataType, ColumnDataTypeExtension, Decimal128, DecimalTypeExtension,
+        IntervalMonthDayNano, SemanticType,
+    };
+    use common_catalog::consts::MITO_ENGINE;
+    use common_time::interval::IntervalUnit;
+    use common_time::timestamp::TimeUnit;
+    use datatypes::data_type::ConcreteDataType;
+    use datatypes::schema::{ColumnSchema, SchemaBuilder};
+    use snafu::ResultExt;
+
+    use super::*;
+    use crate::error;
+    use crate::error::ColumnDataTypeSnafu;
+
+    #[inline]
+    fn build_column_schema(
+        column_name: &str,
+        datatype: i32,
+        nullable: bool,
+    ) -> error::Result<ColumnSchema> {
+        let datatype_wrapper =
+            ColumnDataTypeWrapper::try_new(datatype, None).context(ColumnDataTypeSnafu)?;
+
+        Ok(ColumnSchema::new(
+            column_name,
+            datatype_wrapper.into(),
+            nullable,
+        ))
+    }
+
+    fn build_create_expr_from_insertion(
+        catalog_name: &str,
+        schema_name: &str,
+        table_id: Option<TableId>,
+        table_name: &str,
+        columns: &[Column],
+        engine: &str,
+    ) -> Result<CreateTableExpr> {
+        let table_name = TableReference::full(catalog_name, schema_name, table_name);
+        let column_exprs = ColumnExpr::from_columns(columns);
+        build_create_table_expr(
+            table_id,
+            &table_name,
+            column_exprs,
+            engine,
+            "Created on insertion",
+        )
+    }
+
+    #[test]
+    fn test_build_create_table_request() {
+        let table_id = Some(10);
+        let table_name = "test_metric";
+
+        assert!(
+            build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
+                .is_err()
+        );
+
+        let insert_batch = mock_insert_batch();
+
+        let create_expr = build_create_expr_from_insertion(
+            "",
+            "",
+            table_id,
+            table_name,
+            &insert_batch.0,
+            MITO_ENGINE,
+        )
+        .unwrap();
+
+        assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
+        assert_eq!(table_name, create_expr.table_name);
+        assert_eq!("Created on insertion".to_string(), create_expr.desc);
+        assert_eq!(
+            vec![create_expr.column_defs[0].name.clone()],
+            create_expr.primary_keys
+        );
+
+        let column_defs = create_expr.column_defs;
+        assert_eq!(column_defs[5].name, create_expr.time_index);
+        assert_eq!(7, column_defs.len());
+
+        assert_eq!(
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "host")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "cpu")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "memory")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "time")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "interval")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        assert_eq!(
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    column_defs
+                        .iter()
+                        .find(|c| c.name == "ts")
+                        .unwrap()
+                        .data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+
+        let decimal_column = column_defs.iter().find(|c| c.name == "decimals").unwrap();
+        assert_eq!(
+            ConcreteDataType::decimal128_datatype(38, 10),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    decimal_column.data_type,
+                    decimal_column.datatype_extension,
+                )
+                .unwrap()
+            )
+        );
+    }
+
+    #[test]
+    fn test_find_new_columns() {
+        let mut columns = Vec::with_capacity(1);
+        let cpu_column = build_column_schema("cpu", 10, true).unwrap();
+        let ts_column = build_column_schema("ts", 15, false)
+            .unwrap()
+            .with_time_index(true);
+        columns.push(cpu_column);
+        columns.push(ts_column);
+
+        let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
+
+        assert!(extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
+            .unwrap()
+            .is_none());
+
+        let insert_batch = mock_insert_batch();
+
+        let add_columns = extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(5, add_columns.add_columns.len());
+        let host_column = &add_columns.add_columns[0];
+        assert_eq!(
+            ConcreteDataType::string_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    host_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let memory_column = &add_columns.add_columns[1];
+        assert_eq!(
+            ConcreteDataType::float64_datatype(),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    memory_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let time_column = &add_columns.add_columns[2];
+        assert_eq!(
+            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    time_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let interval_column = &add_columns.add_columns[3];
+        assert_eq!(
+            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    interval_column.column_def.as_ref().unwrap().data_type,
+                    None
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+
+        let decimal_column = &add_columns.add_columns[4];
+        assert_eq!(
+            ConcreteDataType::decimal128_datatype(38, 10),
+            ConcreteDataType::from(
+                ColumnDataTypeWrapper::try_new(
+                    decimal_column.column_def.as_ref().unwrap().data_type,
+                    decimal_column
+                        .column_def
+                        .as_ref()
+                        .unwrap()
+                        .datatype_extension
+                )
+                .unwrap()
+            )
+        );
+        assert!(host_column.add_if_not_exists);
+    }
+
+    fn mock_insert_batch() -> (Vec<Column>, u32) {
+        let row_count = 2;
+
+        let host_vals = Values {
+            string_values: vec!["host1".to_string(), "host2".to_string()],
+            ..Default::default()
+        };
+        let host_column = Column {
+            column_name: "host".to_string(),
+            semantic_type: SemanticType::Tag as i32,
+            values: Some(host_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::String as i32,
+            ..Default::default()
+        };
+
+        let cpu_vals = Values {
+            f64_values: vec![0.31],
+            ..Default::default()
+        };
+        let cpu_column = Column {
+            column_name: "cpu".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(cpu_vals),
+            null_mask: vec![2],
+            datatype: ColumnDataType::Float64 as i32,
+            ..Default::default()
+        };
+
+        let mem_vals = Values {
+            f64_values: vec![0.1],
+            ..Default::default()
+        };
+        let mem_column = Column {
+            column_name: "memory".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(mem_vals),
+            null_mask: vec![1],
+            datatype: ColumnDataType::Float64 as i32,
+            ..Default::default()
+        };
+
+        let time_vals = Values {
+            time_millisecond_values: vec![100, 101],
+            ..Default::default()
+        };
+        let time_column = Column {
+            column_name: "time".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(time_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::TimeMillisecond as i32,
+            ..Default::default()
+        };
+
+        let interval1 = IntervalMonthDayNano {
+            months: 1,
+            days: 2,
+            nanoseconds: 3,
+        };
+        let interval2 = IntervalMonthDayNano {
+            months: 4,
+            days: 5,
+            nanoseconds: 6,
+        };
+        let interval_vals = Values {
+            interval_month_day_nano_values: vec![interval1, interval2],
+            ..Default::default()
+        };
+        let interval_column = Column {
+            column_name: "interval".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(interval_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::IntervalMonthDayNano as i32,
+            ..Default::default()
+        };
+
+        let ts_vals = Values {
+            timestamp_millisecond_values: vec![100, 101],
+            ..Default::default()
+        };
+        let ts_column = Column {
+            column_name: "ts".to_string(),
+            semantic_type: SemanticType::Timestamp as i32,
+            values: Some(ts_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::TimestampMillisecond as i32,
+            ..Default::default()
+        };
+        let decimal_vals = Values {
+            decimal128_values: vec![Decimal128 { hi: 0, lo: 123 }, Decimal128 { hi: 0, lo: 456 }],
+            ..Default::default()
+        };
+        let decimal_column = Column {
+            column_name: "decimals".to_string(),
+            semantic_type: SemanticType::Field as i32,
+            values: Some(decimal_vals),
+            null_mask: vec![0],
+            datatype: ColumnDataType::Decimal128 as i32,
+            datatype_extension: Some(ColumnDataTypeExtension {
+                type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
+                    precision: 38,
+                    scale: 10,
+                })),
+            }),
+            options: None,
+        };
+
+        (
+            vec![
+                host_column,
+                cpu_column,
+                mem_column,
+                time_column,
+                interval_column,
+                ts_column,
+                decimal_column,
+            ],
+            row_count,
+        )
+    }
+}
--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -6,7 +6,7 @@ license.workspace = true

 [features]
 testing = []
-pg_kvbackend = ["dep:tokio-postgres", "dep:backon"]
+pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]

 [lints]
 workspace = true
@@ -36,8 +36,8 @@ common-wal.workspace = true
 datafusion-common.workspace = true
 datafusion-expr.workspace = true
 datatypes.workspace = true
-deadpool.workspace = true
-deadpool-postgres.workspace = true
+deadpool = { workspace = true, optional = true }
+deadpool-postgres = { workspace = true, optional = true }
 derive_builder.workspace = true
 etcd-client.workspace = true
 futures.workspace = true
--- a/src/common/meta/src/cache/flow/table_flownode.rs
+++ b/src/common/meta/src/cache/flow/table_flownode.rs
@@ -16,7 +16,6 @@ use std::collections::HashMap;
 use std::sync::Arc;

 use futures::future::BoxFuture;
-use futures::TryStreamExt;
 use moka::future::Cache;
 use moka::ops::compute::Op;
 use table::metadata::TableId;
@@ -54,9 +53,13 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
        Box::pin(async move {
            table_flow_manager
                .flows(table_id)
-                .map_ok(|(key, value)| (key.flownode_id(), value.peer))
-                .try_collect::<HashMap<_, _>>()
                .await
+                .map(|flows| {
+                    flows
+                        .into_iter()
+                        .map(|(key, value)| (key.flownode_id(), value.peer))
+                        .collect::<HashMap<_, _>>()
+                })
                // We must cache the `HashSet` even if it's empty,
                // to avoid future requests to the remote storage next time;
                // If the value is added to the remote storage,
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::str::FromStr;

+use api::v1::meta::HeartbeatRequest;
 use common_error::ext::ErrorExt;
 use lazy_static::lazy_static;
 use regex::Regex;
@@ -58,7 +60,7 @@ pub trait ClusterInfo {
 ///
 /// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
 /// a `cluster_id`, it serves multiple clusters.
-#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct NodeInfoKey {
    /// The cluster id.
    pub cluster_id: ClusterId,
@@ -69,6 +71,28 @@ pub struct NodeInfoKey {
 }

 impl NodeInfoKey {
+    /// Try to create a `NodeInfoKey` from a "good" heartbeat request. "good" as in every needed
+    /// piece of information is provided and valid.  
+    pub fn new(request: &HeartbeatRequest) -> Option<Self> {
+        let HeartbeatRequest { header, peer, .. } = request;
+        let header = header.as_ref()?;
+        let peer = peer.as_ref()?;
+
+        let role = header.role.try_into().ok()?;
+        let node_id = match role {
+            // Because the Frontend is stateless, it's too easy to neglect choosing a unique id
+            // for it when setting up a cluster. So we calculate its id from its address.
+            Role::Frontend => calculate_node_id(&peer.addr),
+            _ => peer.id,
+        };
+
+        Some(NodeInfoKey {
+            cluster_id: header.cluster_id,
+            role,
+            node_id,
+        })
+    }
+
    pub fn key_prefix_with_cluster_id(cluster_id: u64) -> String {
        format!("{}-{}-", CLUSTER_NODE_INFO_PREFIX, cluster_id)
    }
@@ -83,6 +107,13 @@ impl NodeInfoKey {
    }
 }

+/// Calculate (by using the DefaultHasher) the node's id from its address.
+fn calculate_node_id(addr: &str) -> u64 {
+    let mut hasher = DefaultHasher::new();
+    addr.hash(&mut hasher);
+    hasher.finish()
+}
+
 /// The information of a node in the cluster.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct NodeInfo {
@@ -100,7 +131,7 @@ pub struct NodeInfo {
    pub start_time_ms: u64,
 }

-#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub enum Role {
    Datanode,
    Frontend,
@@ -271,6 +302,7 @@ impl TryFrom<i32> for Role {
 mod tests {
    use std::assert_matches::assert_matches;

+    use super::*;
    use crate::cluster::Role::{Datanode, Frontend};
    use crate::cluster::{DatanodeStatus, NodeInfo, NodeInfoKey, NodeStatus};
    use crate::peer::Peer;
@@ -338,4 +370,26 @@ mod tests {
        let prefix = NodeInfoKey::key_prefix_with_role(2, Frontend);
        assert_eq!(prefix, "__meta_cluster_node_info-2-1-");
    }
+
+    #[test]
+    fn test_calculate_node_id_from_addr() {
+        // Test empty string
+        assert_eq!(calculate_node_id(""), calculate_node_id(""));
+
+        // Test same addresses return same ids
+        let addr1 = "127.0.0.1:8080";
+        let id1 = calculate_node_id(addr1);
+        let id2 = calculate_node_id(addr1);
+        assert_eq!(id1, id2);
+
+        // Test different addresses return different ids
+        let addr2 = "127.0.0.1:8081";
+        let id3 = calculate_node_id(addr2);
+        assert_ne!(id1, id3);
+
+        // Test long address
+        let long_addr = "very.long.domain.name.example.com:9999";
+        let id4 = calculate_node_id(long_addr);
+        assert!(id4 > 0);
+    }
 }
--- a/src/common/meta/src/ddl/create_flow.rs
+++ b/src/common/meta/src/ddl/create_flow.rs
@@ -15,6 +15,7 @@
 mod metadata;

 use std::collections::BTreeMap;
+use std::fmt;

 use api::v1::flow::flow_request::Body as PbFlowRequest;
 use api::v1::flow::{CreateRequest, FlowRequest, FlowRequestHeader};
@@ -28,7 +29,6 @@ use common_procedure::{
 use common_telemetry::info;
 use common_telemetry::tracing_context::TracingContext;
 use futures::future::join_all;
-use futures::TryStreamExt;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use snafu::{ensure, ResultExt};
@@ -77,6 +77,7 @@ impl CreateFlowProcedure {
                query_context,
                state: CreateFlowState::Prepare,
                prev_flow_info_value: None,
+                flow_type: None,
            },
        }
    }
@@ -104,7 +105,7 @@ impl CreateFlowProcedure {
        if create_if_not_exists && or_replace {
            // this is forbidden because not clear what does that mean exactly
            return error::UnsupportedSnafu {
-                operation: "Create flow with both `IF NOT EXISTS` and `OR REPLACE`".to_string(),
+                operation: "Create flow with both `IF NOT EXISTS` and `OR REPLACE`",
            }
            .fail();
        }
@@ -129,9 +130,10 @@ impl CreateFlowProcedure {
                .flow_metadata_manager
                .flow_route_manager()
                .routes(flow_id)
-                .map_ok(|(_, value)| value.peer)
-                .try_collect::<Vec<_>>()
-                .await?;
+                .await?
+                .into_iter()
+                .map(|(_, value)| value.peer)
+                .collect::<Vec<_>>();
            self.data.flow_id = Some(flow_id);
            self.data.peers = peers;
            info!("Replacing flow, flow_id: {}", flow_id);
@@ -175,6 +177,8 @@ impl CreateFlowProcedure {
            self.allocate_flow_id().await?;
        }
        self.data.state = CreateFlowState::CreateFlows;
+        // determine flow type
+        self.data.flow_type = Some(determine_flow_type(&self.data.task));

        Ok(Status::executing(true))
    }
@@ -309,6 +313,11 @@ impl Procedure for CreateFlowProcedure {
    }
 }

+pub fn determine_flow_type(_flow_task: &CreateFlowTask) -> FlowType {
+    // TODO(discord9): determine flow type
+    FlowType::RecordingRule
+}
+
 /// The state of [CreateFlowProcedure].
 #[derive(Debug, Clone, Serialize, Deserialize, AsRefStr, PartialEq)]
 pub enum CreateFlowState {
@@ -322,6 +331,35 @@ pub enum CreateFlowState {
    CreateMetadata,
 }

+/// The type of flow.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub enum FlowType {
+    /// The flow is a recording rule task.
+    RecordingRule,
+    /// The flow is a streaming task.
+    Streaming,
+}
+
+impl FlowType {
+    pub const RECORDING_RULE: &str = "recording_rule";
+    pub const STREAMING: &str = "streaming";
+}
+
+impl Default for FlowType {
+    fn default() -> Self {
+        Self::RecordingRule
+    }
+}
+
+impl fmt::Display for FlowType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            FlowType::RecordingRule => write!(f, "{}", FlowType::RECORDING_RULE),
+            FlowType::Streaming => write!(f, "{}", FlowType::STREAMING),
+        }
+    }
+}
+
 /// The serializable data.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct CreateFlowData {
@@ -335,6 +373,7 @@ pub struct CreateFlowData {
    /// For verify if prev value is consistent when need to update flow metadata.
    /// only set when `or_replace` is true.
    pub(crate) prev_flow_info_value: Option<DeserializedValueWithBytes<FlowInfoValue>>,
+    pub(crate) flow_type: Option<FlowType>,
 }

 impl From<&CreateFlowData> for CreateRequest {
@@ -342,7 +381,7 @@ impl From<&CreateFlowData> for CreateRequest {
        let flow_id = value.flow_id.unwrap();
        let source_table_ids = &value.source_table_ids;

-        CreateRequest {
+        let mut req = CreateRequest {
            flow_id: Some(api::v1::FlowId { id: flow_id }),
            source_table_ids: source_table_ids
                .iter()
@@ -356,7 +395,11 @@ impl From<&CreateFlowData> for CreateRequest {
            comment: value.task.comment.clone(),
            sql: value.task.sql.clone(),
            flow_options: value.task.flow_options.clone(),
-        }
+        };
+
+        let flow_type = value.flow_type.unwrap_or_default().to_string();
+        req.flow_options.insert("flow_type".to_string(), flow_type);
+        req
    }
 }

@@ -369,7 +412,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            expire_after,
            comment,
            sql,
-            flow_options: options,
+            flow_options: mut options,
            ..
        } = value.task.clone();

@@ -386,19 +429,21 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            .map(|(idx, peer)| (idx as u32, FlowRouteValue { peer: peer.clone() }))
            .collect::<Vec<_>>();

-        (
-            FlowInfoValue {
-                source_table_ids: value.source_table_ids.clone(),
-                sink_table_name,
-                flownode_ids,
-                catalog_name,
-                flow_name,
-                raw_sql: sql,
-                expire_after,
-                comment,
-                options,
-            },
-            flow_routes,
-        )
+        let flow_type = value.flow_type.unwrap_or_default().to_string();
+        options.insert("flow_type".to_string(), flow_type);
+
+        let flow_info = FlowInfoValue {
+            source_table_ids: value.source_table_ids.clone(),
+            sink_table_name,
+            flownode_ids,
+            catalog_name,
+            flow_name,
+            raw_sql: sql,
+            expire_after,
+            comment,
+            options,
+        };
+
+        (flow_info, flow_routes)
    }
 }
--- a/src/common/meta/src/ddl/drop_flow/metadata.rs
+++ b/src/common/meta/src/ddl/drop_flow/metadata.rs
@@ -13,7 +13,6 @@
 // limitations under the License.

 use common_catalog::format_full_flow_name;
-use futures::TryStreamExt;
 use snafu::{ensure, OptionExt};

 use crate::ddl::drop_flow::DropFlowProcedure;
@@ -39,9 +38,10 @@ impl DropFlowProcedure {
            .flow_metadata_manager
            .flow_route_manager()
            .routes(self.data.task.flow_id)
-            .map_ok(|(_, value)| value)
-            .try_collect::<Vec<_>>()
-            .await?;
+            .await?
+            .into_iter()
+            .map(|(_, value)| value)
+            .collect::<Vec<_>>();
        ensure!(
            !flow_route_values.is_empty(),
            error::FlowRouteNotFoundSnafu {
--- a/src/common/meta/src/ddl/tests/create_view.rs
+++ b/src/common/meta/src/ddl/tests/create_view.rs
@@ -219,7 +219,7 @@ async fn test_replace_view_metadata() {
        assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
    }

-    // Set `or_replce` to be `true` and try again
+    // Set `or_replace` to be `true` and try again
    task.create_view.or_replace = true;
    task.create_view.logical_plan = vec![4, 5, 6];
    task.create_view.definition = "new_definition".to_string();
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -686,8 +686,8 @@ pub enum Error {
    },

    #[cfg(feature = "pg_kvbackend")]
-    #[snafu(display("Postgres transaction retry failed"))]
-    PostgresTransactionRetryFailed {
+    #[snafu(display("Rds transaction retry failed"))]
+    RdsTransactionRetryFailed {
        #[snafu(implicit)]
        location: Location,
    },
@@ -824,7 +824,7 @@ impl ErrorExt for Error {
            | CreatePostgresPool { .. }
            | GetPostgresConnection { .. }
            | PostgresTransaction { .. }
-            | PostgresTransactionRetryFailed { .. } => StatusCode::Internal,
+            | RdsTransactionRetryFailed { .. } => StatusCode::Internal,
            Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
        }
    }
--- a/src/common/meta/src/key/flow.rs
+++ b/src/common/meta/src/key/flow.rs
@@ -16,9 +16,9 @@ pub mod flow_info;
 pub(crate) mod flow_name;
 pub(crate) mod flow_route;
 pub mod flow_state;
+mod flownode_addr_helper;
 pub(crate) mod flownode_flow;
 pub(crate) mod table_flow;
-
 use std::ops::Deref;
 use std::sync::Arc;

@@ -506,7 +506,6 @@ mod tests {
        let routes = flow_metadata_manager
            .flow_route_manager()
            .routes(flow_id)
-            .try_collect::<Vec<_>>()
            .await
            .unwrap();
        assert_eq!(
@@ -538,7 +537,6 @@ mod tests {
            let nodes = flow_metadata_manager
                .table_flow_manager()
                .flows(table_id)
-                .try_collect::<Vec<_>>()
                .await
                .unwrap();
            assert_eq!(
@@ -727,7 +725,6 @@ mod tests {
        let routes = flow_metadata_manager
            .flow_route_manager()
            .routes(flow_id)
-            .try_collect::<Vec<_>>()
            .await
            .unwrap();
        assert_eq!(
@@ -759,7 +756,6 @@ mod tests {
            let nodes = flow_metadata_manager
                .table_flow_manager()
                .flows(table_id)
-                .try_collect::<Vec<_>>()
                .await
                .unwrap();
            assert_eq!(
--- a/src/common/meta/src/key/flow/flow_route.rs
+++ b/src/common/meta/src/key/flow/flow_route.rs
@@ -12,14 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use futures::stream::BoxStream;
+use futures::TryStreamExt;
 use lazy_static::lazy_static;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use snafu::OptionExt;

 use crate::error::{self, Result};
-use crate::key::flow::FlowScoped;
+use crate::key::flow::{flownode_addr_helper, FlowScoped};
+use crate::key::node_address::NodeAddressKey;
 use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
@@ -167,10 +168,7 @@ impl FlowRouteManager {
    }

    /// Retrieves all [FlowRouteValue]s of the specified `flow_id`.
-    pub fn routes(
-        &self,
-        flow_id: FlowId,
-    ) -> BoxStream<'static, Result<(FlowRouteKey, FlowRouteValue)>> {
+    pub async fn routes(&self, flow_id: FlowId) -> Result<Vec<(FlowRouteKey, FlowRouteValue)>> {
        let start_key = FlowRouteKey::range_start_key(flow_id);
        let req = RangeRequest::new().with_prefix(start_key);
        let stream = PaginationStream::new(
@@ -181,7 +179,9 @@ impl FlowRouteManager {
        )
        .into_stream();

-        Box::pin(stream)
+        let mut res = stream.try_collect::<Vec<_>>().await?;
+        self.remap_flow_route_addresses(&mut res).await?;
+        Ok(res)
    }

    /// Builds a create flow routes transaction.
@@ -203,6 +203,28 @@ impl FlowRouteManager {

        Ok(Txn::new().and_then(txns))
    }
+
+    async fn remap_flow_route_addresses(
+        &self,
+        flow_routes: &mut [(FlowRouteKey, FlowRouteValue)],
+    ) -> Result<()> {
+        let keys = flow_routes
+            .iter()
+            .map(|(_, value)| NodeAddressKey::with_flownode(value.peer.id))
+            .collect();
+        let flow_node_addrs =
+            flownode_addr_helper::get_flownode_addresses(&self.kv_backend, keys).await?;
+        for (_, flow_route_value) in flow_routes.iter_mut() {
+            let flownode_id = flow_route_value.peer.id;
+            // If an id lacks a corresponding address in the `flow_node_addrs`,
+            // it means the old address in `table_flow_value` is still valid,
+            // which is expected.
+            if let Some(node_addr) = flow_node_addrs.get(&flownode_id) {
+                flow_route_value.peer.addr = node_addr.peer.addr.clone();
+            }
+        }
+        Ok(())
+    }
 }

 #[cfg(test)]
--- a/src/common/meta/src/key/flow/flownode_addr_helper.rs
+++ b/src/common/meta/src/key/flow/flownode_addr_helper.rs
@@ -0,0 +1,47 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use crate::error::Result;
+use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
+use crate::key::{MetadataKey, MetadataValue};
+use crate::kv_backend::KvBackendRef;
+use crate::rpc::store::BatchGetRequest;
+
+/// Get the addresses of the flownodes.
+/// The result is a map: node_id -> NodeAddressValue
+pub(crate) async fn get_flownode_addresses(
+    kv_backend: &KvBackendRef,
+    keys: Vec<NodeAddressKey>,
+) -> Result<HashMap<u64, NodeAddressValue>> {
+    if keys.is_empty() {
+        return Ok(HashMap::default());
+    }
+
+    let req = BatchGetRequest {
+        keys: keys.into_iter().map(|k| k.to_bytes()).collect(),
+    };
+    kv_backend
+        .batch_get(req)
+        .await?
+        .kvs
+        .into_iter()
+        .map(|kv| {
+            let key = NodeAddressKey::from_bytes(&kv.key)?;
+            let value = NodeAddressValue::try_from_raw_value(&kv.value)?;
+            Ok((key.node_id, value))
+        })
+        .collect()
+}
--- a/src/common/meta/src/key/flow/table_flow.rs
+++ b/src/common/meta/src/key/flow/table_flow.rs
@@ -14,7 +14,7 @@

 use std::sync::Arc;

-use futures::stream::BoxStream;
+use futures::TryStreamExt;
 use lazy_static::lazy_static;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@@ -22,7 +22,8 @@ use snafu::OptionExt;
 use table::metadata::TableId;

 use crate::error::{self, Result};
-use crate::key::flow::FlowScoped;
+use crate::key::flow::{flownode_addr_helper, FlowScoped};
+use crate::key::node_address::NodeAddressKey;
 use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
@@ -196,10 +197,7 @@ impl TableFlowManager {
    /// Retrieves all [TableFlowKey]s of the specified `table_id`.
    ///
    /// TODO(discord9): add cache for it since range request does not support cache.
-    pub fn flows(
-        &self,
-        table_id: TableId,
-    ) -> BoxStream<'static, Result<(TableFlowKey, TableFlowValue)>> {
+    pub async fn flows(&self, table_id: TableId) -> Result<Vec<(TableFlowKey, TableFlowValue)>> {
        let start_key = TableFlowKey::range_start_key(table_id);
        let req = RangeRequest::new().with_prefix(start_key);
        let stream = PaginationStream::new(
@@ -210,7 +208,9 @@ impl TableFlowManager {
        )
        .into_stream();

-        Box::pin(stream)
+        let mut res = stream.try_collect::<Vec<_>>().await?;
+        self.remap_table_flow_addresses(&mut res).await?;
+        Ok(res)
    }

    /// Builds a create table flow transaction.
@@ -238,6 +238,28 @@ impl TableFlowManager {

        Ok(Txn::new().and_then(txns))
    }
+
+    async fn remap_table_flow_addresses(
+        &self,
+        table_flows: &mut [(TableFlowKey, TableFlowValue)],
+    ) -> Result<()> {
+        let keys = table_flows
+            .iter()
+            .map(|(_, value)| NodeAddressKey::with_flownode(value.peer.id))
+            .collect::<Vec<_>>();
+        let flownode_addrs =
+            flownode_addr_helper::get_flownode_addresses(&self.kv_backend, keys).await?;
+        for (_, table_flow_value) in table_flows.iter_mut() {
+            let flownode_id = table_flow_value.peer.id;
+            // If an id lacks a corresponding address in the `flow_node_addrs`,
+            // it means the old address in `table_flow_value` is still valid,
+            // which is expected.
+            if let Some(flownode_addr) = flownode_addrs.get(&flownode_id) {
+                table_flow_value.peer.addr = flownode_addr.peer.addr.clone();
+            }
+        }
+        Ok(())
+    }
 }

 #[cfg(test)]
--- a/src/common/meta/src/key/node_address.rs
+++ b/src/common/meta/src/key/node_address.rs
@@ -39,6 +39,10 @@ impl NodeAddressKey {
    pub fn with_datanode(node_id: u64) -> Self {
        Self::new(Role::Datanode, node_id)
    }
+
+    pub fn with_flownode(node_id: u64) -> Self {
+        Self::new(Role::Flownode, node_id)
+    }
 }

 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
--- a/src/common/meta/src/kv_backend.rs
+++ b/src/common/meta/src/kv_backend.rs
@@ -32,7 +32,7 @@ pub mod chroot;
 pub mod etcd;
 pub mod memory;
 #[cfg(feature = "pg_kvbackend")]
-pub mod postgres;
+pub mod rds;
 pub mod test;
 pub mod txn;

--- a/src/common/meta/src/kv_backend/postgres.rs
+++ b/src/common/meta/src/kv_backend/postgres.rs
--- a/src/common/meta/src/kv_backend/rds.rs
+++ b/src/common/meta/src/kv_backend/rds.rs
@@ -0,0 +1,548 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::marker::PhantomData;
+use std::time::Duration;
+
+use backon::{BackoffBuilder, ExponentialBuilder};
+use common_telemetry::debug;
+
+use crate::error::{Error, RdsTransactionRetryFailedSnafu, Result};
+use crate::kv_backend::txn::{
+    Compare, Txn as KvTxn, TxnOp, TxnOpResponse, TxnResponse as KvTxnResponse,
+};
+use crate::kv_backend::{KvBackend, TxnService};
+use crate::metrics::METRIC_META_TXN_REQUEST;
+use crate::rpc::store::{
+    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
+    BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse,
+    RangeRequest, RangeResponse,
+};
+use crate::rpc::KeyValue;
+
+mod postgres;
+
+pub use postgres::PgStore;
+
+const RDS_STORE_TXN_RETRY_COUNT: usize = 3;
+
+/// Query executor for rds. It can execute queries or generate a transaction executor.
+#[async_trait::async_trait]
+pub trait Executor: Send + Sync {
+    type Transaction<'a>: 'a + Transaction<'a>
+    where
+        Self: 'a;
+
+    fn name() -> &'static str;
+
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>>;
+
+    /// Some queries don't need to return any result, such as `DELETE`.
+    async fn execute(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<()> {
+        self.query(query, params).await?;
+        Ok(())
+    }
+
+    async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>>;
+}
+
+/// Transaction query executor for rds. It can execute queries in transaction or commit the transaction.
+#[async_trait::async_trait]
+pub trait Transaction<'a>: Send + Sync {
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>>;
+
+    async fn execute(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<()> {
+        self.query(query, params).await?;
+        Ok(())
+    }
+
+    async fn commit(self) -> Result<()>;
+}
+
+/// Factory for creating default and transaction query executors.
+#[async_trait::async_trait]
+pub trait ExecutorFactory<T: Executor>: Send + Sync {
+    async fn default_executor(&self) -> Result<T>;
+
+    async fn txn_executor<'a>(&self, default_executor: &'a mut T) -> Result<T::Transaction<'a>>;
+}
+
+/// Rds backed store for metsrv
+pub struct RdsStore<T, S, R>
+where
+    T: Executor + Send + Sync,
+    S: ExecutorFactory<T> + Send + Sync,
+{
+    max_txn_ops: usize,
+    txn_retry_count: usize,
+    executor_factory: S,
+    sql_template_set: R,
+    _phantom: PhantomData<T>,
+}
+
+pub enum ExecutorImpl<'a, T: Executor + 'a> {
+    Default(T),
+    Txn(T::Transaction<'a>),
+}
+
+impl<T: Executor> ExecutorImpl<'_, T> {
+    async fn query(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<Vec<KeyValue>> {
+        match self {
+            Self::Default(executor) => executor.query(query, params).await,
+            Self::Txn(executor) => executor.query(query, params).await,
+        }
+    }
+
+    async fn commit(self) -> Result<()> {
+        match self {
+            Self::Txn(executor) => executor.commit().await,
+            _ => Ok(()),
+        }
+    }
+}
+
+#[async_trait::async_trait]
+pub trait KvQueryExecutor<T: Executor> {
+    async fn range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: RangeRequest,
+    ) -> Result<RangeResponse>;
+
+    async fn put_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: PutRequest,
+    ) -> Result<PutResponse> {
+        let kv = KeyValue {
+            key: req.key,
+            value: req.value,
+        };
+        let mut res = self
+            .batch_put_with_query_executor(
+                query_executor,
+                BatchPutRequest {
+                    kvs: vec![kv],
+                    prev_kv: req.prev_kv,
+                },
+            )
+            .await?;
+
+        if !res.prev_kvs.is_empty() {
+            debug_assert!(req.prev_kv);
+            return Ok(PutResponse {
+                prev_kv: Some(res.prev_kvs.remove(0)),
+            });
+        }
+        Ok(PutResponse::default())
+    }
+
+    async fn batch_put_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: BatchPutRequest,
+    ) -> Result<BatchPutResponse>;
+
+    /// Batch get with certain client. It's needed for a client with transaction.
+    async fn batch_get_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: BatchGetRequest,
+    ) -> Result<BatchGetResponse>;
+
+    async fn delete_range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: DeleteRangeRequest,
+    ) -> Result<DeleteRangeResponse>;
+
+    async fn batch_delete_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        req: BatchDeleteRequest,
+    ) -> Result<BatchDeleteResponse>;
+}
+
+impl<T, S, R> RdsStore<T, S, R>
+where
+    Self: KvQueryExecutor<T> + Send + Sync,
+    T: Executor + Send + Sync,
+    S: ExecutorFactory<T> + Send + Sync,
+{
+    async fn execute_txn_cmp(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        cmp: &[Compare],
+    ) -> Result<bool> {
+        let batch_get_req = BatchGetRequest {
+            keys: cmp.iter().map(|c| c.key.clone()).collect(),
+        };
+        let res = self
+            .batch_get_with_query_executor(query_executor, batch_get_req)
+            .await?;
+        debug!("batch get res: {:?}", res);
+        let res_map = res
+            .kvs
+            .into_iter()
+            .map(|kv| (kv.key, kv.value))
+            .collect::<HashMap<Vec<u8>, Vec<u8>>>();
+        for c in cmp {
+            let value = res_map.get(&c.key);
+            if !c.compare_value(value) {
+                return Ok(false);
+            }
+        }
+        Ok(true)
+    }
+
+    /// Execute a batch of transaction operations. This function is only used for transactions with the same operation type.
+    async fn try_batch_txn(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        if !check_txn_ops(txn_ops)? {
+            return Ok(None);
+        }
+        // Safety: txn_ops is not empty
+        match txn_ops.first().unwrap() {
+            TxnOp::Delete(_) => self.handle_batch_delete(query_executor, txn_ops).await,
+            TxnOp::Put(_, _) => self.handle_batch_put(query_executor, txn_ops).await,
+            TxnOp::Get(_) => self.handle_batch_get(query_executor, txn_ops).await,
+        }
+    }
+
+    async fn handle_batch_delete(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        let mut batch_del_req = BatchDeleteRequest {
+            keys: vec![],
+            prev_kv: true,
+        };
+        for op in txn_ops {
+            if let TxnOp::Delete(key) = op {
+                batch_del_req.keys.push(key.clone());
+            }
+        }
+        let res = self
+            .batch_delete_with_query_executor(query_executor, batch_del_req)
+            .await?;
+        let res_map = res
+            .prev_kvs
+            .into_iter()
+            .map(|kv| (kv.key, kv.value))
+            .collect::<HashMap<Vec<u8>, Vec<u8>>>();
+        let mut resps = Vec::with_capacity(txn_ops.len());
+        for op in txn_ops {
+            if let TxnOp::Delete(key) = op {
+                let value = res_map.get(key);
+                resps.push(TxnOpResponse::ResponseDelete(DeleteRangeResponse {
+                    deleted: if value.is_some() { 1 } else { 0 },
+                    prev_kvs: vec![],
+                }));
+            }
+        }
+        Ok(Some(resps))
+    }
+
+    async fn handle_batch_put(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        let mut batch_put_req = BatchPutRequest {
+            kvs: vec![],
+            prev_kv: false,
+        };
+        for op in txn_ops {
+            if let TxnOp::Put(key, value) = op {
+                batch_put_req.kvs.push(KeyValue {
+                    key: key.clone(),
+                    value: value.clone(),
+                });
+            }
+        }
+        let _ = self
+            .batch_put_with_query_executor(query_executor, batch_put_req)
+            .await?;
+        let mut resps = Vec::with_capacity(txn_ops.len());
+        for op in txn_ops {
+            if let TxnOp::Put(_, _) = op {
+                resps.push(TxnOpResponse::ResponsePut(PutResponse { prev_kv: None }));
+            }
+        }
+        Ok(Some(resps))
+    }
+
+    async fn handle_batch_get(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        txn_ops: &[TxnOp],
+    ) -> Result<Option<Vec<TxnOpResponse>>> {
+        let mut batch_get_req = BatchGetRequest { keys: vec![] };
+        for op in txn_ops {
+            if let TxnOp::Get(key) = op {
+                batch_get_req.keys.push(key.clone());
+            }
+        }
+        let res = self
+            .batch_get_with_query_executor(query_executor, batch_get_req)
+            .await?;
+        let res_map = res
+            .kvs
+            .into_iter()
+            .map(|kv| (kv.key, kv.value))
+            .collect::<HashMap<Vec<u8>, Vec<u8>>>();
+        let mut resps = Vec::with_capacity(txn_ops.len());
+        for op in txn_ops {
+            if let TxnOp::Get(key) = op {
+                let value = res_map.get(key);
+                resps.push(TxnOpResponse::ResponseGet(RangeResponse {
+                    kvs: value
+                        .map(|v| {
+                            vec![KeyValue {
+                                key: key.clone(),
+                                value: v.clone(),
+                            }]
+                        })
+                        .unwrap_or_default(),
+                    more: false,
+                }));
+            }
+        }
+        Ok(Some(resps))
+    }
+
+    async fn execute_txn_op(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, T>,
+        op: &TxnOp,
+    ) -> Result<TxnOpResponse> {
+        match op {
+            TxnOp::Put(key, value) => {
+                let res = self
+                    .put_with_query_executor(
+                        query_executor,
+                        PutRequest {
+                            key: key.clone(),
+                            value: value.clone(),
+                            prev_kv: false,
+                        },
+                    )
+                    .await?;
+                Ok(TxnOpResponse::ResponsePut(res))
+            }
+            TxnOp::Get(key) => {
+                let res = self
+                    .range_with_query_executor(
+                        query_executor,
+                        RangeRequest {
+                            key: key.clone(),
+                            range_end: vec![],
+                            limit: 1,
+                            keys_only: false,
+                        },
+                    )
+                    .await?;
+                Ok(TxnOpResponse::ResponseGet(res))
+            }
+            TxnOp::Delete(key) => {
+                let res = self
+                    .delete_range_with_query_executor(
+                        query_executor,
+                        DeleteRangeRequest {
+                            key: key.clone(),
+                            range_end: vec![],
+                            prev_kv: false,
+                        },
+                    )
+                    .await?;
+                Ok(TxnOpResponse::ResponseDelete(res))
+            }
+        }
+    }
+
+    async fn txn_inner(&self, txn: &KvTxn) -> Result<KvTxnResponse> {
+        let mut default_executor = self.executor_factory.default_executor().await?;
+        let mut txn_executor = ExecutorImpl::Txn(
+            self.executor_factory
+                .txn_executor(&mut default_executor)
+                .await?,
+        );
+        let mut success = true;
+        if txn.c_when {
+            success = self
+                .execute_txn_cmp(&mut txn_executor, &txn.req.compare)
+                .await?;
+        }
+        let mut responses = vec![];
+        if success && txn.c_then {
+            match self
+                .try_batch_txn(&mut txn_executor, &txn.req.success)
+                .await?
+            {
+                Some(res) => responses.extend(res),
+                None => {
+                    for txnop in &txn.req.success {
+                        let res = self.execute_txn_op(&mut txn_executor, txnop).await?;
+                        responses.push(res);
+                    }
+                }
+            }
+        } else if !success && txn.c_else {
+            match self
+                .try_batch_txn(&mut txn_executor, &txn.req.failure)
+                .await?
+            {
+                Some(res) => responses.extend(res),
+                None => {
+                    for txnop in &txn.req.failure {
+                        let res = self.execute_txn_op(&mut txn_executor, txnop).await?;
+                        responses.push(res);
+                    }
+                }
+            }
+        }
+
+        txn_executor.commit().await?;
+        Ok(KvTxnResponse {
+            responses,
+            succeeded: success,
+        })
+    }
+}
+
+#[async_trait::async_trait]
+impl<T, S, R> KvBackend for RdsStore<T, S, R>
+where
+    R: 'static,
+    Self: KvQueryExecutor<T> + Send + Sync,
+    T: Executor + 'static,
+    S: ExecutorFactory<T> + 'static,
+{
+    fn name(&self) -> &str {
+        T::name()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.range_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.put_with_query_executor(&mut query_executor, req).await
+    }
+
+    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.batch_put_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.batch_get_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.delete_range_with_query_executor(&mut query_executor, req)
+            .await
+    }
+
+    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
+        let client = self.executor_factory.default_executor().await?;
+        let mut query_executor = ExecutorImpl::Default(client);
+        self.batch_delete_with_query_executor(&mut query_executor, req)
+            .await
+    }
+}
+
+#[async_trait::async_trait]
+impl<T, S, R> TxnService for RdsStore<T, S, R>
+where
+    Self: KvQueryExecutor<T> + Send + Sync,
+    T: Executor + 'static,
+    S: ExecutorFactory<T> + 'static,
+{
+    type Error = Error;
+
+    async fn txn(&self, txn: KvTxn) -> Result<KvTxnResponse> {
+        let _timer = METRIC_META_TXN_REQUEST
+            .with_label_values(&[T::name(), "txn"])
+            .start_timer();
+
+        let mut backoff = ExponentialBuilder::default()
+            .with_min_delay(Duration::from_millis(10))
+            .with_max_delay(Duration::from_millis(200))
+            .with_max_times(self.txn_retry_count)
+            .build();
+
+        loop {
+            match self.txn_inner(&txn).await {
+                Ok(res) => return Ok(res),
+                Err(e) => {
+                    if e.is_serialization_error() {
+                        let d = backoff.next();
+                        if let Some(d) = d {
+                            tokio::time::sleep(d).await;
+                            continue;
+                        }
+                        break;
+                    } else {
+                        return Err(e);
+                    }
+                }
+            }
+        }
+
+        RdsTransactionRetryFailedSnafu {}.fail()
+    }
+
+    fn max_txn_ops(&self) -> usize {
+        self.max_txn_ops
+    }
+}
+
+/// Checks if the transaction operations are the same type.
+fn check_txn_ops(txn_ops: &[TxnOp]) -> Result<bool> {
+    if txn_ops.is_empty() {
+        return Ok(false);
+    }
+    let same = txn_ops.windows(2).all(|a| {
+        matches!(
+            (&a[0], &a[1]),
+            (TxnOp::Put(_, _), TxnOp::Put(_, _))
+                | (TxnOp::Get(_), TxnOp::Get(_))
+                | (TxnOp::Delete(_), TxnOp::Delete(_))
+        )
+    });
+    Ok(same)
+}
--- a/src/common/meta/src/kv_backend/rds/postgres.rs
+++ b/src/common/meta/src/kv_backend/rds/postgres.rs
@@ -0,0 +1,624 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+use common_telemetry::debug;
+use deadpool_postgres::{Config, Pool, Runtime};
+use snafu::ResultExt;
+use tokio_postgres::types::ToSql;
+use tokio_postgres::{IsolationLevel, NoTls, Row};
+
+use crate::error::{
+    CreatePostgresPoolSnafu, GetPostgresConnectionSnafu, PostgresExecutionSnafu,
+    PostgresTransactionSnafu, Result,
+};
+use crate::kv_backend::rds::{
+    Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
+    RDS_STORE_TXN_RETRY_COUNT,
+};
+use crate::kv_backend::KvBackendRef;
+use crate::rpc::store::{
+    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
+    BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, RangeRequest, RangeResponse,
+};
+use crate::rpc::KeyValue;
+
+pub struct PgClient(deadpool::managed::Object<deadpool_postgres::Manager>);
+pub struct PgTxnClient<'a>(deadpool_postgres::Transaction<'a>);
+
+/// Converts a row to a [`KeyValue`].
+fn key_value_from_row(r: Row) -> KeyValue {
+    KeyValue {
+        key: r.get(0),
+        value: r.get(1),
+    }
+}
+
+const EMPTY: &[u8] = &[0];
+
+/// Type of range template.
+#[derive(Debug, Clone, Copy)]
+enum RangeTemplateType {
+    Point,
+    Range,
+    Full,
+    LeftBounded,
+    Prefix,
+}
+
+/// Builds params for the given range template type.
+impl RangeTemplateType {
+    fn build_params(&self, mut key: Vec<u8>, range_end: Vec<u8>) -> Vec<Vec<u8>> {
+        match self {
+            RangeTemplateType::Point => vec![key],
+            RangeTemplateType::Range => vec![key, range_end],
+            RangeTemplateType::Full => vec![],
+            RangeTemplateType::LeftBounded => vec![key],
+            RangeTemplateType::Prefix => {
+                key.push(b'%');
+                vec![key]
+            }
+        }
+    }
+}
+
+/// Templates for range request.
+#[derive(Debug, Clone)]
+struct RangeTemplate {
+    point: String,
+    range: String,
+    full: String,
+    left_bounded: String,
+    prefix: String,
+}
+
+impl RangeTemplate {
+    /// Gets the template for the given type.
+    fn get(&self, typ: RangeTemplateType) -> &str {
+        match typ {
+            RangeTemplateType::Point => &self.point,
+            RangeTemplateType::Range => &self.range,
+            RangeTemplateType::Full => &self.full,
+            RangeTemplateType::LeftBounded => &self.left_bounded,
+            RangeTemplateType::Prefix => &self.prefix,
+        }
+    }
+
+    /// Adds limit to the template.
+    fn with_limit(template: &str, limit: i64) -> String {
+        if limit == 0 {
+            return format!("{};", template);
+        }
+        format!("{} LIMIT {};", template, limit)
+    }
+}
+
+fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
+    if start.len() != end.len() {
+        return false;
+    }
+    let l = start.len();
+    let same_prefix = start[0..l - 1] == end[0..l - 1];
+    if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
+        return same_prefix && (*rhs + 1) == *lhs;
+    }
+    false
+}
+
+/// Determine the template type for range request.
+fn range_template(key: &[u8], range_end: &[u8]) -> RangeTemplateType {
+    match (key, range_end) {
+        (_, &[]) => RangeTemplateType::Point,
+        (EMPTY, EMPTY) => RangeTemplateType::Full,
+        (_, EMPTY) => RangeTemplateType::LeftBounded,
+        (start, end) => {
+            if is_prefix_range(start, end) {
+                RangeTemplateType::Prefix
+            } else {
+                RangeTemplateType::Range
+            }
+        }
+    }
+}
+
+/// Generate in placeholders for PostgreSQL.
+fn pg_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {
+    (from..=to).map(|i| format!("${}", i)).collect()
+}
+
+/// Factory for building sql templates.
+struct PgSqlTemplateFactory<'a> {
+    table_name: &'a str,
+}
+
+impl<'a> PgSqlTemplateFactory<'a> {
+    /// Creates a new [`SqlTemplateFactory`] with the given table name.
+    fn new(table_name: &'a str) -> Self {
+        Self { table_name }
+    }
+
+    /// Builds the template set for the given table name.
+    fn build(&self) -> PgSqlTemplateSet {
+        let table_name = self.table_name;
+        PgSqlTemplateSet {
+            table_name: table_name.to_string(),
+            create_table_statement: format!(
+                "CREATE TABLE IF NOT EXISTS {table_name}(k bytea PRIMARY KEY, v bytea)",
+            ),
+            range_template: RangeTemplate {
+                point: format!("SELECT k, v FROM {table_name} WHERE k = $1"),
+                range: format!("SELECT k, v FROM {table_name} WHERE k >= $1 AND k < $2 ORDER BY k"),
+                full: format!("SELECT k, v FROM {table_name} $1 ORDER BY k"),
+                left_bounded: format!("SELECT k, v FROM {table_name} WHERE k >= $1 ORDER BY k"),
+                prefix: format!("SELECT k, v FROM {table_name} WHERE k LIKE $1 ORDER BY k"),
+            },
+            delete_template: RangeTemplate {
+                point: format!("DELETE FROM {table_name} WHERE k = $1 RETURNING k,v;"),
+                range: format!("DELETE FROM {table_name} WHERE k >= $1 AND k < $2 RETURNING k,v;"),
+                full: format!("DELETE FROM {table_name} RETURNING k,v"),
+                left_bounded: format!("DELETE FROM {table_name} WHERE k >= $1 RETURNING k,v;"),
+                prefix: format!("DELETE FROM {table_name} WHERE k LIKE $1 RETURNING k,v;"),
+            },
+        }
+    }
+}
+
+/// Templates for the given table name.
+#[derive(Debug, Clone)]
+pub struct PgSqlTemplateSet {
+    table_name: String,
+    create_table_statement: String,
+    range_template: RangeTemplate,
+    delete_template: RangeTemplate,
+}
+
+impl PgSqlTemplateSet {
+    /// Generates the sql for batch get.
+    fn generate_batch_get_query(&self, key_len: usize) -> String {
+        let table_name = &self.table_name;
+        let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
+        format!("SELECT k, v FROM {table_name} WHERE k in ({});", in_clause)
+    }
+
+    /// Generates the sql for batch delete.
+    fn generate_batch_delete_query(&self, key_len: usize) -> String {
+        let table_name = &self.table_name;
+        let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
+        format!(
+            "DELETE FROM {table_name} WHERE k in ({}) RETURNING k,v;",
+            in_clause
+        )
+    }
+
+    /// Generates the sql for batch upsert.
+    fn generate_batch_upsert_query(&self, kv_len: usize) -> String {
+        let table_name = &self.table_name;
+        let in_placeholders: Vec<String> = (1..=kv_len).map(|i| format!("${}", i)).collect();
+        let in_clause = in_placeholders.join(", ");
+        let mut param_index = kv_len + 1;
+        let mut values_placeholders = Vec::new();
+        for _ in 0..kv_len {
+            values_placeholders.push(format!("(${0}, ${1})", param_index, param_index + 1));
+            param_index += 2;
+        }
+        let values_clause = values_placeholders.join(", ");
+
+        format!(
+            r#"
+    WITH prev AS (
+        SELECT k,v FROM {table_name} WHERE k IN ({in_clause})
+    ), update AS (
+    INSERT INTO {table_name} (k, v) VALUES
+        {values_clause}
+    ON CONFLICT (
+        k
+    ) DO UPDATE SET
+        v = excluded.v
+    )
+
+    SELECT k, v FROM prev;
+    "#
+        )
+    }
+}
+
+#[async_trait::async_trait]
+impl Executor for PgClient {
+    type Transaction<'a>
+        = PgTxnClient<'a>
+    where
+        Self: 'a;
+
+    fn name() -> &'static str {
+        "Postgres"
+    }
+
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
+        let params: Vec<&(dyn ToSql + Sync)> = params.iter().map(|p| p as _).collect();
+        let stmt = self
+            .0
+            .prepare_cached(query)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        let rows = self
+            .0
+            .query(&stmt, &params)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        Ok(rows.into_iter().map(key_value_from_row).collect())
+    }
+
+    async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>> {
+        let txn = self
+            .0
+            .build_transaction()
+            .isolation_level(IsolationLevel::Serializable)
+            .start()
+            .await
+            .context(PostgresTransactionSnafu {
+                operation: "begin".to_string(),
+            })?;
+        Ok(PgTxnClient(txn))
+    }
+}
+
+#[async_trait::async_trait]
+impl<'a> Transaction<'a> for PgTxnClient<'a> {
+    async fn query(&mut self, query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
+        let params: Vec<&(dyn ToSql + Sync)> = params.iter().map(|p| p as _).collect();
+        let stmt = self
+            .0
+            .prepare_cached(query)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        let rows = self
+            .0
+            .query(&stmt, &params)
+            .await
+            .context(PostgresExecutionSnafu { sql: query })?;
+        Ok(rows.into_iter().map(key_value_from_row).collect())
+    }
+
+    async fn commit(self) -> Result<()> {
+        self.0.commit().await.context(PostgresTransactionSnafu {
+            operation: "commit",
+        })?;
+        Ok(())
+    }
+}
+
+pub struct PgExecutorFactory {
+    pool: Pool,
+}
+
+impl PgExecutorFactory {
+    async fn client(&self) -> Result<PgClient> {
+        match self.pool.get().await {
+            Ok(client) => Ok(PgClient(client)),
+            Err(e) => GetPostgresConnectionSnafu {
+                reason: e.to_string(),
+            }
+            .fail(),
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl ExecutorFactory<PgClient> for PgExecutorFactory {
+    async fn default_executor(&self) -> Result<PgClient> {
+        self.client().await
+    }
+
+    async fn txn_executor<'a>(
+        &self,
+        default_executor: &'a mut PgClient,
+    ) -> Result<PgTxnClient<'a>> {
+        default_executor.txn_executor().await
+    }
+}
+
+/// A PostgreSQL-backed key-value store for metasrv.
+/// It uses [deadpool_postgres::Pool] as the connection pool for [RdsStore].
+pub type PgStore = RdsStore<PgClient, PgExecutorFactory, PgSqlTemplateSet>;
+
+#[async_trait::async_trait]
+impl KvQueryExecutor<PgClient> for PgStore {
+    async fn range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: RangeRequest,
+    ) -> Result<RangeResponse> {
+        let template_type = range_template(&req.key, &req.range_end);
+        let template = self.sql_template_set.range_template.get(template_type);
+        let params = template_type.build_params(req.key, req.range_end);
+        let params_ref = params.iter().collect::<Vec<_>>();
+        // Always add 1 to limit to check if there is more data
+        let query =
+            RangeTemplate::with_limit(template, if req.limit == 0 { 0 } else { req.limit + 1 });
+        let limit = req.limit as usize;
+        debug!("query: {:?}, params: {:?}", query, params);
+        let mut kvs = query_executor.query(&query, &params_ref).await?;
+        if req.keys_only {
+            kvs.iter_mut().for_each(|kv| kv.value = vec![]);
+        }
+        // If limit is 0, we always return all data
+        if limit == 0 || kvs.len() <= limit {
+            return Ok(RangeResponse { kvs, more: false });
+        }
+        // If limit is greater than the number of rows, we remove the last row and set more to true
+        let removed = kvs.pop();
+        debug_assert!(removed.is_some());
+        Ok(RangeResponse { kvs, more: true })
+    }
+
+    async fn batch_put_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: BatchPutRequest,
+    ) -> Result<BatchPutResponse> {
+        let mut in_params = Vec::with_capacity(req.kvs.len() * 3);
+        let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
+
+        for kv in &req.kvs {
+            let processed_key = &kv.key;
+            in_params.push(processed_key);
+
+            let processed_value = &kv.value;
+            values_params.push(processed_key);
+            values_params.push(processed_value);
+        }
+        in_params.extend(values_params);
+        let params = in_params.iter().map(|x| x as _).collect::<Vec<_>>();
+        let query = self
+            .sql_template_set
+            .generate_batch_upsert_query(req.kvs.len());
+        let kvs = query_executor.query(&query, &params).await?;
+        if req.prev_kv {
+            Ok(BatchPutResponse { prev_kvs: kvs })
+        } else {
+            Ok(BatchPutResponse::default())
+        }
+    }
+
+    /// Batch get with certain client. It's needed for a client with transaction.
+    async fn batch_get_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: BatchGetRequest,
+    ) -> Result<BatchGetResponse> {
+        if req.keys.is_empty() {
+            return Ok(BatchGetResponse { kvs: vec![] });
+        }
+        let query = self
+            .sql_template_set
+            .generate_batch_get_query(req.keys.len());
+        let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
+        let kvs = query_executor.query(&query, &params).await?;
+        Ok(BatchGetResponse { kvs })
+    }
+
+    async fn delete_range_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: DeleteRangeRequest,
+    ) -> Result<DeleteRangeResponse> {
+        let template_type = range_template(&req.key, &req.range_end);
+        let template = self.sql_template_set.delete_template.get(template_type);
+        let params = template_type.build_params(req.key, req.range_end);
+        let params_ref = params.iter().map(|x| x as _).collect::<Vec<_>>();
+        let kvs = query_executor.query(template, &params_ref).await?;
+        let mut resp = DeleteRangeResponse::new(kvs.len() as i64);
+        if req.prev_kv {
+            resp.with_prev_kvs(kvs);
+        }
+        Ok(resp)
+    }
+
+    async fn batch_delete_with_query_executor(
+        &self,
+        query_executor: &mut ExecutorImpl<'_, PgClient>,
+        req: BatchDeleteRequest,
+    ) -> Result<BatchDeleteResponse> {
+        if req.keys.is_empty() {
+            return Ok(BatchDeleteResponse::default());
+        }
+        let query = self
+            .sql_template_set
+            .generate_batch_delete_query(req.keys.len());
+        let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
+        let kvs = query_executor.query(&query, &params).await?;
+        if req.prev_kv {
+            Ok(BatchDeleteResponse { prev_kvs: kvs })
+        } else {
+            Ok(BatchDeleteResponse::default())
+        }
+    }
+}
+
+impl PgStore {
+    /// Create [PgStore] impl of [KvBackendRef] from url.
+    pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
+        let mut cfg = Config::new();
+        cfg.url = Some(url.to_string());
+        // TODO(weny, CookiePie): add tls support
+        let pool = cfg
+            .create_pool(Some(Runtime::Tokio1), NoTls)
+            .context(CreatePostgresPoolSnafu)?;
+        Self::with_pg_pool(pool, table_name, max_txn_ops).await
+    }
+
+    /// Create [PgStore] impl of [KvBackendRef] from [deadpool_postgres::Pool].
+    pub async fn with_pg_pool(
+        pool: Pool,
+        table_name: &str,
+        max_txn_ops: usize,
+    ) -> Result<KvBackendRef> {
+        // This step ensures the postgres metadata backend is ready to use.
+        // We check if greptime_metakv table exists, and we will create a new table
+        // if it does not exist.
+        let client = match pool.get().await {
+            Ok(client) => client,
+            Err(e) => {
+                return GetPostgresConnectionSnafu {
+                    reason: e.to_string(),
+                }
+                .fail();
+            }
+        };
+        let template_factory = PgSqlTemplateFactory::new(table_name);
+        let sql_template_set = template_factory.build();
+        client
+            .execute(&sql_template_set.create_table_statement, &[])
+            .await
+            .with_context(|_| PostgresExecutionSnafu {
+                sql: sql_template_set.create_table_statement.to_string(),
+            })?;
+        Ok(Arc::new(Self {
+            max_txn_ops,
+            sql_template_set,
+            txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
+            executor_factory: PgExecutorFactory { pool },
+            _phantom: PhantomData,
+        }))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::kv_backend::test::{
+        prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
+        test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
+        test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
+        test_txn_compare_equal, test_txn_compare_greater, test_txn_compare_less,
+        test_txn_compare_not_equal, test_txn_one_compare_op, text_txn_multi_compare_op,
+        unprepare_kv,
+    };
+
+    async fn build_pg_kv_backend(table_name: &str) -> Option<PgStore> {
+        let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap_or_default();
+        if endpoints.is_empty() {
+            return None;
+        }
+
+        let mut cfg = Config::new();
+        cfg.url = Some(endpoints);
+        let pool = cfg
+            .create_pool(Some(Runtime::Tokio1), NoTls)
+            .context(CreatePostgresPoolSnafu)
+            .unwrap();
+        let client = pool.get().await.unwrap();
+        let template_factory = PgSqlTemplateFactory::new(table_name);
+        let sql_templates = template_factory.build();
+        client
+            .execute(&sql_templates.create_table_statement, &[])
+            .await
+            .context(PostgresExecutionSnafu {
+                sql: sql_templates.create_table_statement.to_string(),
+            })
+            .unwrap();
+        Some(PgStore {
+            max_txn_ops: 128,
+            sql_template_set: sql_templates,
+            txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
+            executor_factory: PgExecutorFactory { pool },
+            _phantom: PhantomData,
+        })
+    }
+
+    #[tokio::test]
+    async fn test_pg_put() {
+        let kv_backend = build_pg_kv_backend("put_test").await.unwrap();
+        let prefix = b"put/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_range() {
+        let kv_backend = build_pg_kv_backend("range_test").await.unwrap();
+        let prefix = b"range/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_range_2() {
+        let kv_backend = build_pg_kv_backend("range2_test").await.unwrap();
+        let prefix = b"range2/";
+        test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_batch_get() {
+        let kv_backend = build_pg_kv_backend("batch_get_test").await.unwrap();
+        let prefix = b"batch_get/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_batch_delete() {
+        let kv_backend = build_pg_kv_backend("batch_delete_test").await.unwrap();
+        let prefix = b"batch_delete/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_batch_delete_with_prefix() {
+        let kv_backend = build_pg_kv_backend("batch_delete_with_prefix_test")
+            .await
+            .unwrap();
+        let prefix = b"batch_delete/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_batch_delete_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_delete_range() {
+        let kv_backend = build_pg_kv_backend("delete_range_test").await.unwrap();
+        let prefix = b"delete_range/";
+        prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+        test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+        unprepare_kv(&kv_backend, prefix).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_compare_and_put() {
+        let kv_backend = build_pg_kv_backend("compare_and_put_test").await.unwrap();
+        let prefix = b"compare_and_put/";
+        let kv_backend = Arc::new(kv_backend);
+        test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
+    }
+
+    #[tokio::test]
+    async fn test_pg_txn() {
+        let kv_backend = build_pg_kv_backend("txn_test").await.unwrap();
+        test_txn_one_compare_op(&kv_backend).await;
+        text_txn_multi_compare_op(&kv_backend).await;
+        test_txn_compare_equal(&kv_backend).await;
+        test_txn_compare_greater(&kv_backend).await;
+        test_txn_compare_less(&kv_backend).await;
+        test_txn_compare_not_equal(&kv_backend).await;
+    }
+}
--- a/src/common/meta/src/rpc/ddl.rs
+++ b/src/common/meta/src/rpc/ddl.rs
@@ -1239,6 +1239,7 @@ impl From<QueryContext> for PbQueryContext {
            timezone,
            extensions,
            channel: channel as u32,
+            snapshot_seqs: None,
        }
    }
 }
--- a/src/common/time/Cargo.toml
+++ b/src/common/time/Cargo.toml
@@ -10,7 +10,7 @@ workspace = true
 [dependencies]
 arrow.workspace = true
 chrono.workspace = true
-chrono-tz = "0.8"
+chrono-tz.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
 humantime.workspace = true
--- a/src/datanode/Cargo.toml
+++ b/src/datanode/Cargo.toml
@@ -39,7 +39,7 @@ datafusion-common.workspace = true
 datafusion-expr.workspace = true
 datatypes.workspace = true
 file-engine.workspace = true
-futures = "0.3"
+futures.workspace = true
 futures-util.workspace = true
 humantime-serde.workspace = true
 lazy_static.workspace = true
@@ -47,6 +47,7 @@ log-store.workspace = true
 meta-client.workspace = true
 metric-engine.workspace = true
 mito2.workspace = true
+num_cpus.workspace = true
 object-store.workspace = true
 prometheus.workspace = true
 prost.workspace = true
--- a/src/datanode/src/error.rs
+++ b/src/datanode/src/error.rs
@@ -260,6 +260,14 @@ pub enum Error {
        source: BoxedError,
    },

+    #[snafu(display("Failed to handle batch ddl request, ddl_type: {}", ddl_type))]
+    HandleBatchDdlRequest {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+        ddl_type: String,
+    },
+
    #[snafu(display("RegionId {} not found", region_id))]
    RegionNotFound {
        region_id: RegionId,
@@ -438,7 +446,8 @@ impl ErrorExt for Error {
            UnsupportedOutput { .. } => StatusCode::Unsupported,
            HandleRegionRequest { source, .. }
            | GetRegionMetadata { source, .. }
-            | HandleBatchOpenRequest { source, .. } => source.status_code(),
+            | HandleBatchOpenRequest { source, .. }
+            | HandleBatchDdlRequest { source, .. } => source.status_code(),
            StopRegionEngine { source, .. } => source.status_code(),

            FindLogicalRegions { source, .. } => source.status_code(),
--- a/src/datanode/src/heartbeat.rs
+++ b/src/datanode/src/heartbeat.rs
@@ -224,6 +224,20 @@ impl HeartbeatTask {
        common_runtime::spawn_hb(async move {
            let sleep = tokio::time::sleep(Duration::from_millis(0));
            tokio::pin!(sleep);
+
+            let build_info = common_version::build_info();
+            let heartbeat_request = HeartbeatRequest {
+                peer: self_peer,
+                node_epoch,
+                info: Some(NodeInfo {
+                    version: build_info.version.to_string(),
+                    git_commit: build_info.commit_short.to_string(),
+                    start_time_ms: node_epoch,
+                    cpus: num_cpus::get() as u32,
+                }),
+                ..Default::default()
+            };
+
            loop {
                if !running.load(Ordering::Relaxed) {
                    info!("shutdown heartbeat task");
@@ -235,9 +249,8 @@ impl HeartbeatTask {
                            match outgoing_message_to_mailbox_message(message) {
                                Ok(message) => {
                                    let req = HeartbeatRequest {
-                                        peer: self_peer.clone(),
                                        mailbox_message: Some(message),
-                                        ..Default::default()
+                                        ..heartbeat_request.clone()
                                    };
                                    HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc();
                                    Some(req)
@@ -253,22 +266,13 @@ impl HeartbeatTask {
                        }
                    }
                    _ = &mut sleep => {
-                        let build_info = common_version::build_info();
                        let region_stats = Self::load_region_stats(&region_server_clone);
                        let now = Instant::now();
                        let duration_since_epoch = (now - epoch).as_millis() as u64;
                        let req = HeartbeatRequest {
-                            peer: self_peer.clone(),
                            region_stats,
                            duration_since_epoch,
-                            node_epoch,
-                            info: Some(NodeInfo {
-                                version: build_info.version.to_string(),
-                                git_commit: build_info.commit_short.to_string(),
-                                // The start timestamp is the same as node_epoch currently.
-                                start_time_ms: node_epoch,
-                            }),
-                            ..Default::default()
+                            ..heartbeat_request.clone()
                        };
                        sleep.as_mut().reset(now + Duration::from_millis(interval));
                        Some(req)
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -59,7 +59,7 @@ use store_api::region_engine::{
    SettableRegionRoleState,
 };
 use store_api::region_request::{
-    AffectedRows, RegionCloseRequest, RegionOpenRequest, RegionRequest,
+    AffectedRows, BatchRegionDdlRequest, RegionCloseRequest, RegionOpenRequest, RegionRequest,
 };
 use store_api::storage::RegionId;
 use tokio::sync::{Semaphore, SemaphorePermit};
@@ -69,9 +69,10 @@ use tonic::{Request, Response, Result as TonicResult};
 use crate::error::{
    self, BuildRegionRequestsSnafu, ConcurrentQueryLimiterClosedSnafu,
    ConcurrentQueryLimiterTimeoutSnafu, DataFusionSnafu, DecodeLogicalPlanSnafu,
-    ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, HandleBatchOpenRequestSnafu,
-    HandleRegionRequestSnafu, NewPlanDecoderSnafu, RegionEngineNotFoundSnafu, RegionNotFoundSnafu,
-    RegionNotReadySnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
+    ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, HandleBatchDdlRequestSnafu,
+    HandleBatchOpenRequestSnafu, HandleRegionRequestSnafu, NewPlanDecoderSnafu,
+    RegionEngineNotFoundSnafu, RegionNotFoundSnafu, RegionNotReadySnafu, Result,
+    StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
 };
 use crate::event_listener::RegionServerEventListenerRef;

@@ -158,7 +159,12 @@ impl RegionServer {
        self.inner.handle_request(region_id, request).await
    }

-    async fn table_provider(&self, region_id: RegionId) -> Result<Arc<dyn TableProvider>> {
+    /// Returns a table provider for the region. Will set snapshot sequence if available in the context.
+    async fn table_provider(
+        &self,
+        region_id: RegionId,
+        ctx: Option<&session::context::QueryContext>,
+    ) -> Result<Arc<dyn TableProvider>> {
        let status = self
            .inner
            .region_map
@@ -172,7 +178,7 @@ impl RegionServer {

        self.inner
            .table_provider_factory
-            .create(region_id, status.into_engine())
+            .create(region_id, status.into_engine(), ctx)
            .await
            .context(ExecuteLogicalPlanSnafu)
    }
@@ -187,9 +193,6 @@ impl RegionServer {
        } else {
            None
        };
-        let region_id = RegionId::from_u64(request.region_id);
-        let provider = self.table_provider(region_id).await?;
-        let catalog_list = Arc::new(DummyCatalogList::with_table_provider(provider));

        let query_ctx: QueryContextRef = request
            .header
@@ -197,6 +200,10 @@ impl RegionServer {
            .map(|h| Arc::new(h.into()))
            .unwrap_or_else(|| Arc::new(QueryContextBuilder::default().build()));

+        let region_id = RegionId::from_u64(request.region_id);
+        let provider = self.table_provider(region_id, Some(&query_ctx)).await?;
+        let catalog_list = Arc::new(DummyCatalogList::with_table_provider(provider));
+
        let decoder = self
            .inner
            .query_engine
@@ -225,7 +232,10 @@ impl RegionServer {
        } else {
            None
        };
-        let provider = self.table_provider(request.region_id).await?;
+
+        let ctx: Option<session::context::QueryContext> = request.header.as_ref().map(|h| h.into());
+
+        let provider = self.table_provider(request.region_id, ctx.as_ref()).await?;

        struct RegionDataSourceInjector {
            source: Arc<dyn TableSource>,
@@ -344,62 +354,47 @@ impl RegionServer {
            .region_map
            .insert(region_id, RegionEngineWithStatus::Ready(engine));
    }
-}
-
-#[async_trait]
-impl RegionServerHandler for RegionServer {
-    async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
-        let is_parallel = matches!(
-            request,
-            region_request::Body::Inserts(_) | region_request::Body::Deletes(_)
-        );
-        let requests = RegionRequest::try_from_request_body(request)
-            .context(BuildRegionRequestsSnafu)
-            .map_err(BoxedError::new)
-            .context(ExecuteGrpcRequestSnafu)?;

+    async fn handle_batch_ddl_requests(
+        &self,
+        request: region_request::Body,
+    ) -> Result<RegionResponse> {
+        // Safety: we have already checked the request type in `RegionServer::handle()`.
+        let batch_request = BatchRegionDdlRequest::try_from_request_body(request)
+            .context(BuildRegionRequestsSnafu)?
+            .unwrap();
        let tracing_context = TracingContext::from_current_span();

-        let results = if is_parallel {
-            let join_tasks = requests.into_iter().map(|(region_id, req)| {
-                let self_to_move = self.clone();
-                let span = tracing_context.attach(info_span!(
-                    "RegionServer::handle_region_request",
-                    region_id = region_id.to_string()
-                ));
-                async move {
-                    self_to_move
-                        .handle_request(region_id, req)
-                        .trace(span)
-                        .await
-                }
-            });
+        let span = tracing_context.attach(info_span!("RegionServer::handle_batch_ddl_requests"));
+        self.inner
+            .handle_batch_request(batch_request)
+            .trace(span)
+            .await
+    }

-            try_join_all(join_tasks)
-                .await
-                .map_err(BoxedError::new)
-                .context(ExecuteGrpcRequestSnafu)?
-        } else {
-            let mut results = Vec::with_capacity(requests.len());
-            // FIXME(jeremy, ruihang): Once the engine supports merged calls, we should immediately
-            // modify this part to avoid inefficient serial loop calls.
-            for (region_id, req) in requests {
-                let span = tracing_context.attach(info_span!(
-                    "RegionServer::handle_region_request",
-                    region_id = region_id.to_string()
-                ));
-                let result = self
+    async fn handle_requests_in_parallel(
+        &self,
+        request: region_request::Body,
+    ) -> Result<RegionResponse> {
+        let requests =
+            RegionRequest::try_from_request_body(request).context(BuildRegionRequestsSnafu)?;
+        let tracing_context = TracingContext::from_current_span();
+
+        let join_tasks = requests.into_iter().map(|(region_id, req)| {
+            let self_to_move = self;
+            let span = tracing_context.attach(info_span!(
+                "RegionServer::handle_region_request",
+                region_id = region_id.to_string()
+            ));
+            async move {
+                self_to_move
                    .handle_request(region_id, req)
                    .trace(span)
                    .await
-                    .map_err(BoxedError::new)
-                    .context(ExecuteGrpcRequestSnafu)?;
-                results.push(result);
            }
-            results
-        };
+        });

-        // merge results by sum up affected rows and merge extensions.
+        let results = try_join_all(join_tasks).await?;
        let mut affected_rows = 0;
        let mut extensions = HashMap::new();
        for result in results {
@@ -407,6 +402,57 @@ impl RegionServerHandler for RegionServer {
            extensions.extend(result.extensions);
        }

+        Ok(RegionResponse {
+            affected_rows,
+            extensions,
+        })
+    }
+
+    async fn handle_requests_in_serial(
+        &self,
+        request: region_request::Body,
+    ) -> Result<RegionResponse> {
+        let requests =
+            RegionRequest::try_from_request_body(request).context(BuildRegionRequestsSnafu)?;
+        let tracing_context = TracingContext::from_current_span();
+
+        let mut affected_rows = 0;
+        let mut extensions = HashMap::new();
+        // FIXME(jeremy, ruihang): Once the engine supports merged calls, we should immediately
+        // modify this part to avoid inefficient serial loop calls.
+        for (region_id, req) in requests {
+            let span = tracing_context.attach(info_span!(
+                "RegionServer::handle_region_request",
+                region_id = region_id.to_string()
+            ));
+            let result = self.handle_request(region_id, req).trace(span).await?;
+
+            affected_rows += result.affected_rows;
+            extensions.extend(result.extensions);
+        }
+
+        Ok(RegionResponse {
+            affected_rows,
+            extensions,
+        })
+    }
+}
+
+#[async_trait]
+impl RegionServerHandler for RegionServer {
+    async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
+        let response = match &request {
+            region_request::Body::Creates(_)
+            | region_request::Body::Drops(_)
+            | region_request::Body::Alters(_) => self.handle_batch_ddl_requests(request).await,
+            region_request::Body::Inserts(_) | region_request::Body::Deletes(_) => {
+                self.handle_requests_in_parallel(request).await
+            }
+            _ => self.handle_requests_in_serial(request).await,
+        }
+        .map_err(BoxedError::new)
+        .context(ExecuteGrpcRequestSnafu)?;
+
        Ok(RegionResponseV1 {
            header: Some(ResponseHeader {
                status: Some(Status {
@@ -414,8 +460,8 @@ impl RegionServerHandler for RegionServer {
                    ..Default::default()
                }),
            }),
-            affected_rows: affected_rows as _,
-            extensions,
+            affected_rows: response.affected_rows as _,
+            extensions: response.extensions,
        })
    }
 }
@@ -661,7 +707,7 @@ impl RegionServerInner {
                            }
                        }
                        Err(e) => {
-                            self.unset_region_status(region_id, *region_change);
+                            self.unset_region_status(region_id, &engine, *region_change);
                            error!(e; "Failed to open region: {}", region_id);
                            errors.push(e);
                        }
@@ -670,7 +716,7 @@ impl RegionServerInner {
            }
            Err(e) => {
                for (&region_id, region_change) in &region_changes {
-                    self.unset_region_status(region_id, *region_change);
+                    self.unset_region_status(region_id, &engine, *region_change);
                }
                error!(e; "Failed to open batch regions");
                errors.push(BoxedError::new(e));
@@ -727,6 +773,71 @@ impl RegionServerInner {
            .collect::<Vec<_>>())
    }

+    // Handle requests in batch.
+    //
+    // limitation: all create requests must be in the same engine.
+    pub async fn handle_batch_request(
+        &self,
+        batch_request: BatchRegionDdlRequest,
+    ) -> Result<RegionResponse> {
+        let region_changes = match &batch_request {
+            BatchRegionDdlRequest::Create(requests) => requests
+                .iter()
+                .map(|(region_id, create)| {
+                    let attribute = parse_region_attribute(&create.engine, &create.options)?;
+                    Ok((*region_id, RegionChange::Register(attribute)))
+                })
+                .collect::<Result<Vec<_>>>()?,
+            BatchRegionDdlRequest::Drop(requests) => requests
+                .iter()
+                .map(|(region_id, _)| (*region_id, RegionChange::Deregisters))
+                .collect::<Vec<_>>(),
+            BatchRegionDdlRequest::Alter(requests) => requests
+                .iter()
+                .map(|(region_id, _)| (*region_id, RegionChange::None))
+                .collect::<Vec<_>>(),
+        };
+
+        // The ddl procedure will ensure all requests are in the same engine.
+        // Therefore, we can get the engine from the first request.
+        let (first_region_id, first_region_change) = region_changes.first().unwrap();
+        let engine = match self.get_engine(*first_region_id, first_region_change)? {
+            CurrentEngine::Engine(engine) => engine,
+            CurrentEngine::EarlyReturn(rows) => return Ok(RegionResponse::new(rows)),
+        };
+
+        for (region_id, region_change) in region_changes.iter() {
+            self.set_region_status_not_ready(*region_id, &engine, region_change);
+        }
+
+        let ddl_type = batch_request.request_type();
+        let result = engine
+            .handle_batch_ddl_requests(batch_request)
+            .await
+            .context(HandleBatchDdlRequestSnafu { ddl_type });
+
+        match result {
+            Ok(result) => {
+                for (region_id, region_change) in region_changes {
+                    self.set_region_status_ready(region_id, engine.clone(), region_change)
+                        .await?;
+                }
+
+                Ok(RegionResponse {
+                    affected_rows: result.affected_rows,
+                    extensions: result.extensions,
+                })
+            }
+            Err(err) => {
+                for (region_id, region_change) in region_changes {
+                    self.unset_region_status(region_id, &engine, region_change);
+                }
+
+                Err(err)
+            }
+        }
+    }
+
    pub async fn handle_request(
        &self,
        region_id: RegionId,
@@ -780,7 +891,7 @@ impl RegionServerInner {
            }
            Err(err) => {
                // Removes the region status if the operation fails.
-                self.unset_region_status(region_id, region_change);
+                self.unset_region_status(region_id, &engine, region_change);
                Err(err)
            }
        }
@@ -809,12 +920,21 @@ impl RegionServerInner {
        }
    }

-    fn unset_region_status(&self, region_id: RegionId, region_change: RegionChange) {
+    fn unset_region_status(
+        &self,
+        region_id: RegionId,
+        engine: &RegionEngineRef,
+        region_change: RegionChange,
+    ) {
        match region_change {
            RegionChange::None => {}
-            RegionChange::Register(_) | RegionChange::Deregisters => {
+            RegionChange::Register(_) => {
                self.region_map.remove(&region_id);
            }
+            RegionChange::Deregisters => {
+                self.region_map
+                    .insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
+            }
            RegionChange::Catchup => {}
        }
    }
@@ -1195,7 +1315,7 @@ mod tests {
            .unwrap_err();

        let status = mock_region_server.inner.region_map.get(&region_id);
-        assert!(status.is_none());
+        assert!(status.is_some());
    }

    struct CurrentEngineTest {
--- a/src/datanode/src/tests.rs
+++ b/src/datanode/src/tests.rs
@@ -37,7 +37,7 @@ use store_api::region_engine::{
    SettableRegionRoleState,
 };
 use store_api::region_request::{AffectedRows, RegionRequest};
-use store_api::storage::{RegionId, ScanRequest};
+use store_api::storage::{RegionId, ScanRequest, SequenceNumber};
 use table::TableRef;
 use tokio::sync::mpsc::{Receiver, Sender};

@@ -218,6 +218,10 @@ impl RegionEngine for MockRegionEngine {
        unimplemented!()
    }

+    async fn get_last_seq_num(&self, _: RegionId) -> Result<Option<SequenceNumber>, BoxedError> {
+        unimplemented!()
+    }
+
    async fn stop(&self) -> Result<(), BoxedError> {
        Ok(())
    }
--- a/src/datatypes/Cargo.toml
+++ b/src/datatypes/Cargo.toml
@@ -29,7 +29,7 @@ jsonb.workspace = true
 num = "0.4"
 num-traits = "0.2"
 ordered-float = { version = "3.0", features = ["serde"] }
-paste = "1.0"
+paste.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 snafu.workspace = true
--- a/src/datatypes/src/schema/column_schema.rs
+++ b/src/datatypes/src/schema/column_schema.rs
@@ -183,12 +183,6 @@ impl ColumnSchema {
        self
    }

-    // Put a placeholder to invalidate schemas.all(!has_inverted_index_key).
-    pub fn insert_inverted_index_placeholder(&mut self) {
-        self.metadata
-            .insert(INVERTED_INDEX_KEY.to_string(), "".to_string());
-    }
-
    pub fn is_inverted_indexed(&self) -> bool {
        self.metadata
            .get(INVERTED_INDEX_KEY)
@@ -386,6 +380,11 @@ impl ColumnSchema {
        );
        Ok(())
    }
+
+    pub fn unset_skipping_options(&mut self) -> Result<()> {
+        self.metadata.remove(SKIPPING_INDEX_KEY);
+        Ok(())
+    }
 }

 /// Column extended type set in column schema's metadata.
--- a/src/file-engine/Cargo.toml
+++ b/src/file-engine/Cargo.toml
@@ -13,7 +13,7 @@ workspace = true

 [dependencies]
 api.workspace = true
-async-trait = "0.1"
+async-trait.workspace = true
 common-catalog.workspace = true
 common-datasource.workspace = true
 common-error.workspace = true
--- a/src/file-engine/src/engine.rs
+++ b/src/file-engine/src/engine.rs
@@ -33,7 +33,7 @@ use store_api::region_request::{
    AffectedRows, RegionCloseRequest, RegionCreateRequest, RegionDropRequest, RegionOpenRequest,
    RegionRequest,
 };
-use store_api::storage::{RegionId, ScanRequest};
+use store_api::storage::{RegionId, ScanRequest, SequenceNumber};
 use tokio::sync::Mutex;

 use crate::config::EngineConfig;
@@ -114,6 +114,10 @@ impl RegionEngine for FileRegionEngine {
        None
    }

+    async fn get_last_seq_num(&self, _: RegionId) -> Result<Option<SequenceNumber>, BoxedError> {
+        Ok(None)
+    }
+
    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<(), BoxedError> {
        self.inner
            .set_region_role(region_id, role)
--- a/src/flow/Cargo.toml
+++ b/src/flow/Cargo.toml
@@ -41,7 +41,7 @@ datafusion-substrait.workspace = true
 datatypes.workspace = true
 enum-as-inner = "0.6.0"
 enum_dispatch = "0.3"
-futures = "0.3"
+futures.workspace = true
 get-size2 = "0.1.2"
 greptime-proto.workspace = true
 # This fork of hydroflow is simply for keeping our dependency in our org, and pin the version
@@ -53,6 +53,7 @@ lazy_static.workspace = true
 meta-client.workspace = true
 nom = "7.1.3"
 num-traits = "0.2"
+num_cpus.workspace = true
 operator.workspace = true
 partition.workspace = true
 prometheus.workspace = true
--- a/src/flow/src/adapter/util.rs
+++ b/src/flow/src/adapter/util.rs
@@ -22,7 +22,7 @@ use common_meta::key::table_info::TableInfoValue;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::schema::ColumnSchema;
 use itertools::Itertools;
-use operator::expr_factory::CreateExprFactory;
+use operator::expr_helper;
 use session::context::QueryContextBuilder;
 use snafu::{OptionExt, ResultExt};
 use table::table_reference::TableReference;
@@ -32,7 +32,6 @@ use crate::adapter::{TableName, WorkerHandle, AUTO_CREATED_PLACEHOLDER_TS_COL};
 use crate::error::{Error, ExternalSnafu, UnexpectedSnafu};
 use crate::repr::{ColumnType, RelationDesc, RelationType};
 use crate::FlowWorkerManager;
-
 impl FlowWorkerManager {
    /// Get a worker handle for creating flow, using round robin to select a worker
    pub(crate) async fn get_worker_handle_for_create_flow(&self) -> &WorkerHandle {
@@ -66,19 +65,18 @@ impl FlowWorkerManager {
        let proto_schema = column_schemas_to_proto(tys.clone(), &pks)?;

        // create sink table
-        let create_expr = CreateExprFactory {}
-            .create_table_expr_by_column_schemas(
-                &TableReference {
-                    catalog: &table_name[0],
-                    schema: &table_name[1],
-                    table: &table_name[2],
-                },
-                &proto_schema,
-                "mito",
-                Some(&format!("Sink table for flow {}", flow_name)),
-            )
-            .map_err(BoxedError::new)
-            .context(ExternalSnafu)?;
+        let create_expr = expr_helper::create_table_expr_by_column_schemas(
+            &TableReference {
+                catalog: &table_name[0],
+                schema: &table_name[1],
+                table: &table_name[2],
+            },
+            &proto_schema,
+            "mito",
+            Some(&format!("Sink table for flow {}", flow_name)),
+        )
+        .map_err(BoxedError::new)
+        .context(ExternalSnafu)?;

        self.submit_create_sink_table_ddl(create_expr).await?;
        Ok(true)
--- a/src/flow/src/heartbeat.rs
+++ b/src/flow/src/heartbeat.rs
@@ -60,12 +60,12 @@ async fn query_flow_state(
 #[derive(Clone)]
 pub struct HeartbeatTask {
    node_id: u64,
+    node_epoch: u64,
    peer_addr: String,
    meta_client: Arc<MetaClient>,
    report_interval: Duration,
    retry_interval: Duration,
    resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
-    start_time_ms: u64,
    running: Arc<AtomicBool>,
    query_stat_size: Option<SizeReportSender>,
 }
@@ -83,12 +83,12 @@ impl HeartbeatTask {
    ) -> Self {
        Self {
            node_id: opts.node_id.unwrap_or(0),
+            node_epoch: common_time::util::current_time_millis() as u64,
            peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
            meta_client,
            report_interval: heartbeat_opts.interval,
            retry_interval: heartbeat_opts.retry_interval,
            resp_handler_executor,
-            start_time_ms: common_time::util::current_time_millis() as u64,
            running: Arc::new(AtomicBool::new(false)),
            query_stat_size: None,
        }
@@ -134,10 +134,9 @@ impl HeartbeatTask {
        }
    }

-    fn create_heartbeat_request(
+    fn new_heartbeat_request(
+        heartbeat_request: &HeartbeatRequest,
        message: Option<OutgoingMessage>,
-        peer: Option<Peer>,
-        start_time_ms: u64,
        latest_report: &Option<FlowStat>,
    ) -> Option<HeartbeatRequest> {
        let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
@@ -161,10 +160,8 @@ impl HeartbeatTask {

        Some(HeartbeatRequest {
            mailbox_message,
-            peer,
-            info: Self::build_node_info(start_time_ms),
            flow_stat,
-            ..Default::default()
+            ..heartbeat_request.clone()
        })
    }

@@ -174,6 +171,7 @@ impl HeartbeatTask {
            version: build_info.version.to_string(),
            git_commit: build_info.commit_short.to_string(),
            start_time_ms,
+            cpus: num_cpus::get() as u32,
        })
    }

@@ -183,7 +181,7 @@ impl HeartbeatTask {
        mut outgoing_rx: mpsc::Receiver<OutgoingMessage>,
    ) {
        let report_interval = self.report_interval;
-        let start_time_ms = self.start_time_ms;
+        let node_epoch = self.node_epoch;
        let self_peer = Some(Peer {
            id: self.node_id,
            addr: self.peer_addr.clone(),
@@ -198,18 +196,25 @@ impl HeartbeatTask {
            interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
            let mut latest_report = None;

+            let heartbeat_request = HeartbeatRequest {
+                peer: self_peer,
+                node_epoch,
+                info: Self::build_node_info(node_epoch),
+                ..Default::default()
+            };
+
            loop {
                let req = tokio::select! {
                    message = outgoing_rx.recv() => {
                        if let Some(message) = message {
-                            Self::create_heartbeat_request(Some(message), self_peer.clone(), start_time_ms, &latest_report)
+                            Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report)
                        } else {
                            // Receives None that means Sender was dropped, we need to break the current loop
                            break
                        }
                    }
                    _ = interval.tick() => {
-                        Self::create_heartbeat_request(None, self_peer.clone(), start_time_ms, &latest_report)
+                        Self::new_heartbeat_request(&heartbeat_request, None, &latest_report)
                    }
                };

--- a/src/frontend/Cargo.toml
+++ b/src/frontend/Cargo.toml
@@ -13,7 +13,7 @@ workspace = true
 [dependencies]
 api.workspace = true
 arc-swap = "1.0"
-async-trait = "0.1"
+async-trait.workspace = true
 auth.workspace = true
 cache.workspace = true
 catalog.workspace = true
@@ -35,6 +35,7 @@ common-runtime.workspace = true
 common-telemetry.workspace = true
 common-time.workspace = true
 common-version.workspace = true
+datafusion.workspace = true
 datafusion-expr.workspace = true
 datanode.workspace = true
 datatypes.workspace = true
@@ -43,6 +44,7 @@ lazy_static.workspace = true
 log-query.workspace = true
 log-store.workspace = true
 meta-client.workspace = true
+num_cpus.workspace = true
 opentelemetry-proto.workspace = true
 operator.workspace = true
 partition.workspace = true
@@ -52,6 +54,7 @@ promql-parser.workspace = true
 prost.workspace = true
 query.workspace = true
 serde.workspace = true
+serde_json.workspace = true
 servers.workspace = true
 session.workspace = true
 snafu.workspace = true
@@ -68,7 +71,7 @@ catalog = { workspace = true, features = ["testing"] }
 common-test-util.workspace = true
 datanode.workspace = true
 datatypes.workspace = true
-futures = "0.3"
+futures.workspace = true
 serde_json.workspace = true
 strfmt = "0.2"
 tower.workspace = true
--- a/src/frontend/src/frontend.rs
+++ b/src/frontend/src/frontend.rs
@@ -24,7 +24,8 @@ use servers::heartbeat_options::HeartbeatOptions;
 use servers::http::HttpOptions;

 use crate::service_config::{
-    InfluxdbOptions, MysqlOptions, OpentsdbOptions, OtlpOptions, PostgresOptions, PromStoreOptions,
+    InfluxdbOptions, JaegerOptions, MysqlOptions, OpentsdbOptions, OtlpOptions, PostgresOptions,
+    PromStoreOptions,
 };

 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
@@ -40,6 +41,7 @@ pub struct FrontendOptions {
    pub opentsdb: OpentsdbOptions,
    pub influxdb: InfluxdbOptions,
    pub prom_store: PromStoreOptions,
+    pub jaeger: JaegerOptions,
    pub otlp: OtlpOptions,
    pub meta_client: Option<MetaClientOptions>,
    pub logging: LoggingOptions,
@@ -62,6 +64,7 @@ impl Default for FrontendOptions {
            postgres: PostgresOptions::default(),
            opentsdb: OpentsdbOptions::default(),
            influxdb: InfluxdbOptions::default(),
+            jaeger: JaegerOptions::default(),
            prom_store: PromStoreOptions::default(),
            otlp: OtlpOptions::default(),
            meta_client: None,
--- a/src/frontend/src/heartbeat.rs
+++ b/src/frontend/src/heartbeat.rs
@@ -118,10 +118,9 @@ impl HeartbeatTask {
        });
    }

-    fn create_heartbeat_request(
+    fn new_heartbeat_request(
+        heartbeat_request: &HeartbeatRequest,
        message: Option<OutgoingMessage>,
-        peer: Option<Peer>,
-        start_time_ms: u64,
    ) -> Option<HeartbeatRequest> {
        let mailbox_message = match message.map(outgoing_message_to_mailbox_message) {
            Some(Ok(message)) => Some(message),
@@ -134,9 +133,7 @@ impl HeartbeatTask {

        Some(HeartbeatRequest {
            mailbox_message,
-            peer,
-            info: Self::build_node_info(start_time_ms),
-            ..Default::default()
+            ..heartbeat_request.clone()
        })
    }

@@ -147,6 +144,7 @@ impl HeartbeatTask {
            version: build_info.version.to_string(),
            git_commit: build_info.commit_short.to_string(),
            start_time_ms,
+            cpus: num_cpus::get() as u32,
        })
    }

@@ -167,11 +165,17 @@ impl HeartbeatTask {
            let sleep = tokio::time::sleep(Duration::from_millis(0));
            tokio::pin!(sleep);

+            let heartbeat_request = HeartbeatRequest {
+                peer: self_peer,
+                info: Self::build_node_info(start_time_ms),
+                ..Default::default()
+            };
+
            loop {
                let req = tokio::select! {
                    message = outgoing_rx.recv() => {
                        if let Some(message) = message {
-                            Self::create_heartbeat_request(Some(message), self_peer.clone(), start_time_ms)
+                            Self::new_heartbeat_request(&heartbeat_request, Some(message))
                        } else {
                            // Receives None that means Sender was dropped, we need to break the current loop
                            break
@@ -179,7 +183,7 @@ impl HeartbeatTask {
                    }
                    _ = &mut sleep => {
                        sleep.as_mut().reset(Instant::now() + Duration::from_millis(report_interval));
-                       Self::create_heartbeat_request(None, self_peer.clone(), start_time_ms)
+                       Self::new_heartbeat_request(&heartbeat_request, None)
                    }
                };

--- a/src/frontend/src/instance.rs
+++ b/src/frontend/src/instance.rs
@@ -15,6 +15,7 @@
 pub mod builder;
 mod grpc;
 mod influxdb;
+mod jaeger;
 mod log_handler;
 mod logs;
 mod opentsdb;
@@ -65,7 +66,7 @@ use servers::prometheus_handler::PrometheusHandler;
 use servers::query_handler::grpc::GrpcQueryHandler;
 use servers::query_handler::sql::SqlQueryHandler;
 use servers::query_handler::{
-    InfluxdbLineProtocolHandler, LogQueryHandler, OpenTelemetryProtocolHandler,
+    InfluxdbLineProtocolHandler, JaegerQueryHandler, LogQueryHandler, OpenTelemetryProtocolHandler,
    OpentsdbProtocolHandler, PipelineHandler, PromStoreProtocolHandler,
 };
 use servers::server::ServerHandlers;
@@ -100,6 +101,7 @@ pub trait FrontendInstance:
    + PrometheusHandler
    + PipelineHandler
    + LogQueryHandler
+    + JaegerQueryHandler
    + Send
    + Sync
    + 'static
@@ -167,6 +169,10 @@ impl Instance {
        &self.catalog_manager
    }

+    pub fn query_engine(&self) -> &QueryEngineRef {
+        &self.query_engine
+    }
+
    pub fn plugins(&self) -> Plugins {
        self.plugins.clone()
    }
@@ -231,6 +237,13 @@ impl Instance {

        let output = match stmt {
            Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
+                // TODO: remove this when format is supported in datafusion
+                if let Statement::Explain(explain) = &stmt {
+                    if let Some(format) = explain.format() {
+                        query_ctx.set_explain_format(format.to_string());
+                    }
+                }
+
                let stmt = QueryStatement::Sql(stmt);
                let plan = self
                    .statement_executor
--- a/src/frontend/src/instance/jaeger.rs
+++ b/src/frontend/src/instance/jaeger.rs
@@ -0,0 +1,337 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use catalog::CatalogManagerRef;
+use common_function::function::{Function, FunctionRef};
+use common_function::scalars::json::json_get::{
+    JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString,
+};
+use common_function::scalars::udf::create_udf;
+use common_function::state::FunctionState;
+use common_query::Output;
+use common_recordbatch::adapter::RecordBatchStreamAdapter;
+use datafusion::dataframe::DataFrame;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::SessionStateBuilder;
+use datafusion_expr::{col, lit, lit_timestamp_nano, Expr};
+use query::QueryEngineRef;
+use serde_json::Value as JsonValue;
+use servers::error::{
+    CatalogSnafu, CollectRecordbatchSnafu, DataFusionSnafu, Result as ServerResult,
+    TableNotFoundSnafu,
+};
+use servers::http::jaeger::QueryTraceParams;
+use servers::otlp::trace::{
+    DURATION_NANO_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN, SPAN_ID_COLUMN,
+    SPAN_KIND_COLUMN, SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
+    TRACE_TABLE_NAME,
+};
+use servers::query_handler::JaegerQueryHandler;
+use session::context::QueryContextRef;
+use snafu::{OptionExt, ResultExt};
+use table::table::adapter::DfTableProviderAdapter;
+
+use super::Instance;
+
+const DEFAULT_LIMIT: usize = 100;
+
+#[async_trait]
+impl JaegerQueryHandler for Instance {
+    async fn get_services(&self, ctx: QueryContextRef) -> ServerResult<Output> {
+        // It's equivalent to `SELECT DISTINCT(service_name) FROM {db}.{trace_table}`.
+        Ok(query_trace_table(
+            ctx,
+            self.catalog_manager(),
+            self.query_engine(),
+            vec![col(SERVICE_NAME_COLUMN)],
+            vec![],
+            Some(DEFAULT_LIMIT),
+            None,
+            true,
+        )
+        .await?)
+    }
+
+    async fn get_operations(
+        &self,
+        ctx: QueryContextRef,
+        service_name: &str,
+        span_kind: Option<&str>,
+    ) -> ServerResult<Output> {
+        let mut filters = vec![col(SERVICE_NAME_COLUMN).eq(lit(service_name))];
+
+        if let Some(span_kind) = span_kind {
+            filters.push(col(SPAN_KIND_COLUMN).eq(lit(format!(
+                "{}{}",
+                SPAN_KIND_PREFIX,
+                span_kind.to_uppercase()
+            ))));
+        }
+
+        // It's equivalent to `SELECT span_name, span_kind FROM {db}.{trace_table} WHERE service_name = '{service_name}'`.
+        Ok(query_trace_table(
+            ctx,
+            self.catalog_manager(),
+            self.query_engine(),
+            vec![
+                col(SPAN_NAME_COLUMN),
+                col(SPAN_KIND_COLUMN),
+                col(SERVICE_NAME_COLUMN),
+            ],
+            filters,
+            Some(DEFAULT_LIMIT),
+            None,
+            false,
+        )
+        .await?)
+    }
+
+    async fn get_trace(&self, ctx: QueryContextRef, trace_id: &str) -> ServerResult<Output> {
+        // It's equivalent to `SELECT trace_id, timestamp, duration_nano, service_name, span_name, span_id, span_attributes FROM {db}.{trace_table} WHERE trace_id = '{trace_id}'`.
+        let selects = vec![
+            col(TRACE_ID_COLUMN),
+            col(TIMESTAMP_COLUMN),
+            col(DURATION_NANO_COLUMN),
+            col(SERVICE_NAME_COLUMN),
+            col(SPAN_NAME_COLUMN),
+            col(SPAN_ID_COLUMN),
+            col(SPAN_ATTRIBUTES_COLUMN),
+        ];
+
+        let filters = vec![col(TRACE_ID_COLUMN).eq(lit(trace_id))];
+
+        Ok(query_trace_table(
+            ctx,
+            self.catalog_manager(),
+            self.query_engine(),
+            selects,
+            filters,
+            Some(DEFAULT_LIMIT),
+            None,
+            false,
+        )
+        .await?)
+    }
+
+    async fn find_traces(
+        &self,
+        ctx: QueryContextRef,
+        query_params: QueryTraceParams,
+    ) -> ServerResult<Output> {
+        let selects = vec![
+            col(TRACE_ID_COLUMN),
+            col(TIMESTAMP_COLUMN),
+            col(DURATION_NANO_COLUMN),
+            col(SERVICE_NAME_COLUMN),
+            col(SPAN_NAME_COLUMN),
+            col(SPAN_ID_COLUMN),
+            col(SPAN_ATTRIBUTES_COLUMN),
+        ];
+
+        let mut filters = vec![];
+
+        if let Some(operation_name) = query_params.operation_name {
+            filters.push(col(SPAN_NAME_COLUMN).eq(lit(operation_name)));
+        }
+
+        if let Some(start_time) = query_params.start_time {
+            filters.push(col(TIMESTAMP_COLUMN).gt_eq(lit_timestamp_nano(start_time)));
+        }
+
+        if let Some(end_time) = query_params.end_time {
+            filters.push(col(TIMESTAMP_COLUMN).lt_eq(lit_timestamp_nano(end_time)));
+        }
+
+        if let Some(min_duration) = query_params.min_duration {
+            filters.push(col(DURATION_NANO_COLUMN).gt_eq(lit(min_duration)));
+        }
+
+        if let Some(max_duration) = query_params.max_duration {
+            filters.push(col(DURATION_NANO_COLUMN).lt_eq(lit(max_duration)));
+        }
+
+        Ok(query_trace_table(
+            ctx,
+            self.catalog_manager(),
+            self.query_engine(),
+            selects,
+            filters,
+            Some(DEFAULT_LIMIT),
+            query_params.tags,
+            false,
+        )
+        .await?)
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn query_trace_table(
+    ctx: QueryContextRef,
+    catalog_manager: &CatalogManagerRef,
+    query_engine: &QueryEngineRef,
+    selects: Vec<Expr>,
+    filters: Vec<Expr>,
+    limit: Option<usize>,
+    tags: Option<HashMap<String, JsonValue>>,
+    distinct: bool,
+) -> ServerResult<Output> {
+    let db = ctx.get_db_string();
+    let table = catalog_manager
+        .table(ctx.current_catalog(), &db, TRACE_TABLE_NAME, Some(&ctx))
+        .await
+        .context(CatalogSnafu)?
+        .with_context(|| TableNotFoundSnafu {
+            table: TRACE_TABLE_NAME,
+            catalog: ctx.current_catalog(),
+            schema: db,
+        })?;
+
+    let df_context = create_df_context(query_engine, ctx.clone())?;
+
+    let dataframe = df_context
+        .read_table(Arc::new(DfTableProviderAdapter::new(table)))
+        .context(DataFusionSnafu)?;
+
+    let dataframe = dataframe.select(selects).context(DataFusionSnafu)?;
+
+    // Apply all filters.
+    let dataframe = filters
+        .into_iter()
+        .chain(tags.map_or(Ok(vec![]), |t| tags_filters(&dataframe, t))?)
+        .try_fold(dataframe, |df, expr| {
+            df.filter(expr).context(DataFusionSnafu)
+        })?;
+
+    // Apply the distinct if needed.
+    let dataframe = if distinct {
+        dataframe.distinct().context(DataFusionSnafu)?
+    } else {
+        dataframe
+    };
+
+    // Apply the limit if needed.
+    let dataframe = if let Some(limit) = limit {
+        dataframe.limit(0, Some(limit)).context(DataFusionSnafu)?
+    } else {
+        dataframe
+    };
+
+    // Execute the query and collect the result.
+    let stream = dataframe.execute_stream().await.context(DataFusionSnafu)?;
+
+    let output = Output::new_with_stream(Box::pin(
+        RecordBatchStreamAdapter::try_new(stream).context(CollectRecordbatchSnafu)?,
+    ));
+
+    Ok(output)
+}
+
+// The current implementation registers UDFs during the planning stage, which makes it difficult
+// to utilize them through DataFrame APIs. To address this limitation, we create a new session
+// context and register the required UDFs, allowing them to be decoupled from the global context.
+// TODO(zyy17): Is it possible or necessary to reuse the existing session context?
+fn create_df_context(
+    query_engine: &QueryEngineRef,
+    ctx: QueryContextRef,
+) -> ServerResult<SessionContext> {
+    let df_context = SessionContext::new_with_state(
+        SessionStateBuilder::new_from_existing(query_engine.engine_state().session_state()).build(),
+    );
+
+    // The following JSON UDFs will be used for tags filters.
+    let udfs: Vec<FunctionRef> = vec![
+        Arc::new(JsonGetInt),
+        Arc::new(JsonGetFloat),
+        Arc::new(JsonGetBool),
+        Arc::new(JsonGetString),
+    ];
+
+    for udf in udfs {
+        df_context
+            .register_udf(create_udf(udf, ctx.clone(), Arc::new(FunctionState::default())).into());
+    }
+
+    Ok(df_context)
+}
+
+fn tags_filters(
+    dataframe: &DataFrame,
+    tags: HashMap<String, JsonValue>,
+) -> ServerResult<Vec<Expr>> {
+    let mut filters = vec![];
+
+    // NOTE: The key of the tags may contain `.`, for example: `http.status_code`, so we need to use `["http.status_code"]` in json path to access the value.
+    for (key, value) in tags.iter() {
+        if let JsonValue::String(value) = value {
+            filters.push(
+                dataframe
+                    .registry()
+                    .udf(JsonGetString {}.name())
+                    .context(DataFusionSnafu)?
+                    .call(vec![
+                        col(SPAN_ATTRIBUTES_COLUMN),
+                        lit(format!("[\"{}\"]", key)),
+                    ])
+                    .eq(lit(value)),
+            );
+        }
+        if let JsonValue::Number(value) = value {
+            if value.is_i64() {
+                filters.push(
+                    dataframe
+                        .registry()
+                        .udf(JsonGetInt {}.name())
+                        .context(DataFusionSnafu)?
+                        .call(vec![
+                            col(SPAN_ATTRIBUTES_COLUMN),
+                            lit(format!("[\"{}\"]", key)),
+                        ])
+                        .eq(lit(value.as_i64().unwrap())),
+                );
+            }
+            if value.is_f64() {
+                filters.push(
+                    dataframe
+                        .registry()
+                        .udf(JsonGetFloat {}.name())
+                        .context(DataFusionSnafu)?
+                        .call(vec![
+                            col(SPAN_ATTRIBUTES_COLUMN),
+                            lit(format!("[\"{}\"]", key)),
+                        ])
+                        .eq(lit(value.as_f64().unwrap())),
+                );
+            }
+        }
+        if let JsonValue::Bool(value) = value {
+            filters.push(
+                dataframe
+                    .registry()
+                    .udf(JsonGetBool {}.name())
+                    .context(DataFusionSnafu)?
+                    .call(vec![
+                        col(SPAN_ATTRIBUTES_COLUMN),
+                        lit(format!("[\"{}\"]", key)),
+                    ])
+                    .eq(lit(*value)),
+            );
+        }
+    }
+
+    Ok(filters)
+}
--- a/src/frontend/src/server.rs
+++ b/src/frontend/src/server.rs
@@ -112,6 +112,11 @@ where
        if opts.otlp.enable {
            builder = builder.with_otlp_handler(self.instance.clone());
        }
+
+        if opts.jaeger.enable {
+            builder = builder.with_jaeger_handler(self.instance.clone());
+        }
+
        builder
    }

@@ -227,6 +232,7 @@ where
                Arc::new(MysqlSpawnConfig::new(
                    opts.tls.should_force_tls(),
                    tls_server_config,
+                    opts.keep_alive.as_secs(),
                    opts.reject_no_database.unwrap_or(false),
                )),
            );
@@ -248,6 +254,7 @@ where
                ServerSqlQueryHandlerAdapter::arc(instance.clone()),
                opts.tls.should_force_tls(),
                tls_server_config,
+                opts.keep_alive.as_secs(),
                common_runtime::global_runtime(),
                user_provider.clone(),
            )) as Box<dyn Server>;
--- a/src/frontend/src/service_config.rs
+++ b/src/frontend/src/service_config.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 pub mod influxdb;
+pub mod jaeger;
 pub mod mysql;
 pub mod opentsdb;
 pub mod otlp;
@@ -20,6 +21,7 @@ pub mod postgres;
 pub mod prom_store;

 pub use influxdb::InfluxdbOptions;
+pub use jaeger::JaegerOptions;
 pub use mysql::MysqlOptions;
 pub use opentsdb::OpentsdbOptions;
 pub use otlp::OtlpOptions;
--- a/src/frontend/src/service_config/jaeger.rs
+++ b/src/frontend/src/service_config/jaeger.rs
@@ -0,0 +1,39 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use serde::{Deserialize, Serialize};
+
+/// Options for Jaeger query APIs.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct JaegerOptions {
+    /// Whether to enable Jaeger query APIs.
+    pub enable: bool,
+}
+
+impl Default for JaegerOptions {
+    fn default() -> Self {
+        Self { enable: true }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::JaegerOptions;
+
+    #[test]
+    fn test_jaeger_options() {
+        let default = JaegerOptions::default();
+        assert!(default.enable);
+    }
+}
--- a/src/frontend/src/service_config/mysql.rs
+++ b/src/frontend/src/service_config/mysql.rs
@@ -23,6 +23,12 @@ pub struct MysqlOptions {
    #[serde(default = "Default::default")]
    pub tls: TlsOption,
    pub reject_no_database: Option<bool>,
+    /// Server-side keep-alive time.
+    ///
+    /// Set to 0 (default) to disable.
+    #[serde(default = "Default::default")]
+    #[serde(with = "humantime_serde")]
+    pub keep_alive: std::time::Duration,
 }

 impl Default for MysqlOptions {
@@ -33,6 +39,7 @@ impl Default for MysqlOptions {
            runtime_size: 2,
            tls: TlsOption::default(),
            reject_no_database: None,
+            keep_alive: std::time::Duration::from_secs(0),
        }
    }
 }
--- a/src/frontend/src/service_config/postgres.rs
+++ b/src/frontend/src/service_config/postgres.rs
@@ -22,6 +22,12 @@ pub struct PostgresOptions {
    pub runtime_size: usize,
    #[serde(default = "Default::default")]
    pub tls: TlsOption,
+    /// Server-side keep-alive time.
+    ///
+    /// Set to 0 (default) to disable.
+    #[serde(default = "Default::default")]
+    #[serde(with = "humantime_serde")]
+    pub keep_alive: std::time::Duration,
 }

 impl Default for PostgresOptions {
@@ -31,6 +37,7 @@ impl Default for PostgresOptions {
            addr: "127.0.0.1:4003".to_string(),
            runtime_size: 2,
            tls: Default::default(),
+            keep_alive: std::time::Duration::from_secs(0),
        }
    }
 }
--- a/src/index/src/fulltext_index/tests.rs
+++ b/src/index/src/fulltext_index/tests.rs
@@ -25,12 +25,12 @@ use crate::fulltext_index::create::{FulltextIndexCreator, TantivyFulltextIndexCr
 use crate::fulltext_index::search::{FulltextIndexSearcher, RowId, TantivyFulltextIndexSearcher};
 use crate::fulltext_index::{Analyzer, Config};

-async fn new_bounded_stager(prefix: &str) -> (TempDir, Arc<BoundedStager>) {
+async fn new_bounded_stager(prefix: &str) -> (TempDir, Arc<BoundedStager<String>>) {
    let staging_dir = create_temp_dir(prefix);
    let path = staging_dir.path().to_path_buf();
    (
        staging_dir,
-        Arc::new(BoundedStager::new(path, 102400).await.unwrap()),
+        Arc::new(BoundedStager::new(path, 102400, None, None).await.unwrap()),
    )
 }

@@ -68,13 +68,13 @@ async fn test_search(
    let file_accessor = Arc::new(MockFileAccessor::new(prefix));
    let puffin_manager = FsPuffinManager::new(stager, file_accessor);

-    let file_name = "fulltext_index";
-    let blob_key = "fulltext_index";
-    let mut writer = puffin_manager.writer(file_name).await.unwrap();
-    create_index(prefix, &mut writer, blob_key, texts, config).await;
+    let file_name = "fulltext_index".to_string();
+    let blob_key = "fulltext_index".to_string();
+    let mut writer = puffin_manager.writer(&file_name).await.unwrap();
+    create_index(prefix, &mut writer, &blob_key, texts, config).await;

-    let reader = puffin_manager.reader(file_name).await.unwrap();
-    let index_dir = reader.dir(blob_key).await.unwrap();
+    let reader = puffin_manager.reader(&file_name).await.unwrap();
+    let index_dir = reader.dir(&blob_key).await.unwrap();
    let searcher = TantivyFulltextIndexSearcher::new(index_dir.path()).unwrap();
    let results = searcher.search(query).await.unwrap();

--- a/src/log-query/src/log_query.rs
+++ b/src/log-query/src/log_query.rs
@@ -55,7 +55,7 @@ pub struct LogQuery {
 }

 /// Expression to calculate on log after filtering.
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum LogExpr {
    NamedIdent(String),
    PositionalIdent(usize),
@@ -289,7 +289,7 @@ pub struct ColumnFilters {
    pub filters: Vec<ContentFilter>,
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum ContentFilter {
    // Search-based filters
    /// Only match the exact content.
@@ -310,14 +310,19 @@ pub enum ContentFilter {
    // Value-based filters
    /// Content exists, a.k.a. not null.
    Exist,
-    Between(String, String),
+    Between {
+        start: String,
+        end: String,
+        start_inclusive: bool,
+        end_inclusive: bool,
+    },
    // TODO(ruihang): arithmetic operations

    // Compound filters
    Compound(Vec<ContentFilter>, BinaryOperator),
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum BinaryOperator {
    And,
    Or,
--- a/src/meta-client/Cargo.toml
+++ b/src/meta-client/Cargo.toml
@@ -9,7 +9,7 @@ workspace = true

 [dependencies]
 api.workspace = true
-async-trait = "0.1"
+async-trait.workspace = true
 common-error.workspace = true
 common-grpc.workspace = true
 common-macro.workspace = true
@@ -27,7 +27,7 @@ tonic.workspace = true

 [dev-dependencies]
 datatypes.workspace = true
-futures = "0.3"
+futures.workspace = true
 meta-srv = { workspace = true, features = ["mock"] }
 tower.workspace = true
 tracing = "0.1"
--- a/src/meta-srv/Cargo.toml
+++ b/src/meta-srv/Cargo.toml
@@ -16,7 +16,7 @@ local-ip-address.workspace = true

 [dependencies]
 api.workspace = true
-async-trait = "0.1"
+async-trait.workspace = true
 bytes.workspace = true
 chrono.workspace = true
 clap.workspace = true
--- a/src/meta-srv/src/bootstrap.rs
+++ b/src/meta-srv/src/bootstrap.rs
@@ -24,7 +24,7 @@ use common_meta::kv_backend::chroot::ChrootKvBackend;
 use common_meta::kv_backend::etcd::EtcdStore;
 use common_meta::kv_backend::memory::MemoryKvBackend;
 #[cfg(feature = "pg_kvbackend")]
-use common_meta::kv_backend::postgres::PgStore;
+use common_meta::kv_backend::rds::PgStore;
 use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef};
 #[cfg(feature = "pg_kvbackend")]
 use common_telemetry::error;
--- a/src/meta-srv/src/election/postgres.rs
+++ b/src/meta-srv/src/election/postgres.rs
@@ -787,12 +787,12 @@ mod tests {
            .unwrap();
        assert!(res);

-        let (value, _, _, prev) = pg_election
+        let (value_get, _, _, prev) = pg_election
            .get_value_with_lease(&key, true)
            .await
            .unwrap()
            .unwrap();
-        assert_eq!(value, value);
+        assert_eq!(value_get, value);

        let prev = prev.unwrap();
        pg_election
--- a/src/meta-srv/src/handler.rs
+++ b/src/meta-srv/src/handler.rs
@@ -44,6 +44,7 @@ use mailbox_handler::MailboxHandler;
 use on_leader_start_handler::OnLeaderStartHandler;
 use publish_heartbeat_handler::PublishHeartbeatHandler;
 use region_lease_handler::RegionLeaseHandler;
+use remap_flow_peer_handler::RemapFlowPeerHandler;
 use response_header_handler::ResponseHeaderHandler;
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::RegionId;
@@ -71,6 +72,7 @@ pub mod mailbox_handler;
 pub mod on_leader_start_handler;
 pub mod publish_heartbeat_handler;
 pub mod region_lease_handler;
+pub mod remap_flow_peer_handler;
 pub mod response_header_handler;

 #[async_trait::async_trait]
@@ -573,6 +575,7 @@ impl HeartbeatHandlerGroupBuilder {
            self.add_handler_last(publish_heartbeat_handler);
        }
        self.add_handler_last(CollectStatsHandler::new(self.flush_stats_factor));
+        self.add_handler_last(RemapFlowPeerHandler::default());

        if let Some(flow_state_handler) = self.flow_state_handler.take() {
            self.add_handler_last(flow_state_handler);
@@ -853,7 +856,7 @@ mod tests {
            .unwrap();

        let handlers = group.handlers;
-        assert_eq!(12, handlers.len());
+        assert_eq!(13, handlers.len());

        let names = [
            "ResponseHeaderHandler",
@@ -868,6 +871,7 @@ mod tests {
            "MailboxHandler",
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -888,7 +892,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(13, handlers.len());
+        assert_eq!(14, handlers.len());

        let names = [
            "ResponseHeaderHandler",
@@ -904,6 +908,7 @@ mod tests {
            "CollectStatsHandler",
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -921,7 +926,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(13, handlers.len());
+        assert_eq!(14, handlers.len());

        let names = [
            "CollectStatsHandler",
@@ -937,6 +942,7 @@ mod tests {
            "MailboxHandler",
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -954,7 +960,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(13, handlers.len());
+        assert_eq!(14, handlers.len());

        let names = [
            "ResponseHeaderHandler",
@@ -970,6 +976,7 @@ mod tests {
            "CollectStatsHandler",
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -987,7 +994,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(13, handlers.len());
+        assert_eq!(14, handlers.len());

        let names = [
            "ResponseHeaderHandler",
@@ -1003,6 +1010,7 @@ mod tests {
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
            "ResponseHeaderHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -1020,7 +1028,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(12, handlers.len());
+        assert_eq!(13, handlers.len());

        let names = [
            "ResponseHeaderHandler",
@@ -1035,6 +1043,7 @@ mod tests {
            "CollectStatsHandler",
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -1052,7 +1061,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(12, handlers.len());
+        assert_eq!(13, handlers.len());

        let names = [
            "ResponseHeaderHandler",
@@ -1067,6 +1076,7 @@ mod tests {
            "MailboxHandler",
            "FilterInactiveRegionStatsHandler",
            "ResponseHeaderHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -1084,7 +1094,7 @@ mod tests {

        let group = builder.build().unwrap();
        let handlers = group.handlers;
-        assert_eq!(12, handlers.len());
+        assert_eq!(13, handlers.len());

        let names = [
            "CollectStatsHandler",
@@ -1099,6 +1109,7 @@ mod tests {
            "MailboxHandler",
            "FilterInactiveRegionStatsHandler",
            "CollectStatsHandler",
+            "RemapFlowPeerHandler",
        ];

        for (handler, name) in handlers.iter().zip(names.into_iter()) {
--- a/src/meta-srv/src/handler/collect_cluster_info_handler.rs
+++ b/src/meta-srv/src/handler/collect_cluster_info_handler.rs
@@ -13,7 +13,6 @@
 // limitations under the License.

 use api::v1::meta::{HeartbeatRequest, NodeInfo as PbNodeInfo, Role};
-use common_meta::cluster;
 use common_meta::cluster::{
    DatanodeStatus, FlownodeStatus, FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus,
 };
@@ -42,7 +41,7 @@ impl HeartbeatHandler for CollectFrontendClusterInfoHandler {
        ctx: &mut Context,
        _acc: &mut HeartbeatAccumulator,
    ) -> Result<HandleControl> {
-        let Some((key, peer, info)) = extract_base_info(req, Role::Frontend) else {
+        let Some((key, peer, info)) = extract_base_info(req) else {
            return Ok(HandleControl::Continue);
        };

@@ -75,7 +74,7 @@ impl HeartbeatHandler for CollectFlownodeClusterInfoHandler {
        ctx: &mut Context,
        _acc: &mut HeartbeatAccumulator,
    ) -> Result<HandleControl> {
-        let Some((key, peer, info)) = extract_base_info(req, Role::Flownode) else {
+        let Some((key, peer, info)) = extract_base_info(req) else {
            return Ok(HandleControl::Continue);
        };

@@ -109,7 +108,7 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler {
        ctx: &mut Context,
        acc: &mut HeartbeatAccumulator,
    ) -> Result<HandleControl> {
-        let Some((key, peer, info)) = extract_base_info(req, Role::Datanode) else {
+        let Some((key, peer, info)) = extract_base_info(req) else {
            return Ok(HandleControl::Continue);
        };

@@ -144,16 +143,9 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler {
    }
 }

-fn extract_base_info(
-    req: &HeartbeatRequest,
-    role: Role,
-) -> Option<(NodeInfoKey, Peer, PbNodeInfo)> {
-    let HeartbeatRequest {
-        header, peer, info, ..
-    } = req;
-    let Some(header) = &header else {
-        return None;
-    };
+fn extract_base_info(request: &HeartbeatRequest) -> Option<(NodeInfoKey, Peer, PbNodeInfo)> {
+    let HeartbeatRequest { peer, info, .. } = request;
+    let key = NodeInfoKey::new(request)?;
    let Some(peer) = &peer else {
        return None;
    };
@@ -161,19 +153,7 @@ fn extract_base_info(
        return None;
    };

-    Some((
-        NodeInfoKey {
-            cluster_id: header.cluster_id,
-            role: match role {
-                Role::Datanode => cluster::Role::Datanode,
-                Role::Frontend => cluster::Role::Frontend,
-                Role::Flownode => cluster::Role::Flownode,
-            },
-            node_id: peer.id,
-        },
-        Peer::from(peer.clone()),
-        info.clone(),
-    ))
+    Some((key, Peer::from(peer.clone()), info.clone()))
 }

 async fn put_into_memory_store(ctx: &mut Context, key: NodeInfoKey, value: NodeInfo) -> Result<()> {
--- a/src/meta-srv/src/handler/collect_stats_handler.rs
+++ b/src/meta-srv/src/handler/collect_stats_handler.rs
@@ -21,7 +21,7 @@ use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
 use common_meta::key::{MetadataKey, MetadataValue};
 use common_meta::peer::Peer;
 use common_meta::rpc::store::PutRequest;
-use common_telemetry::{error, warn};
+use common_telemetry::{error, info, warn};
 use dashmap::DashMap;
 use snafu::ResultExt;

@@ -185,6 +185,10 @@ async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) {

        match ctx.leader_cached_kv_backend.put(put).await {
            Ok(_) => {
+                info!(
+                    "Successfully updated datanode `NodeAddressValue`: {:?}",
+                    peer
+                );
                // broadcast invalidating cache
                let cache_idents = stat
                    .table_ids()
@@ -200,11 +204,14 @@ async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) {
                }
            }
            Err(e) => {
-                error!(e; "Failed to update NodeAddressValue: {:?}", peer);
+                error!(e; "Failed to update datanode `NodeAddressValue`: {:?}", peer);
            }
        }
    } else {
-        warn!("Failed to serialize NodeAddressValue: {:?}", peer);
+        warn!(
+            "Failed to serialize datanode `NodeAddressValue`: {:?}",
+            peer
+        );
    }
 }

--- a/src/meta-srv/src/handler/remap_flow_peer_handler.rs
+++ b/src/meta-srv/src/handler/remap_flow_peer_handler.rs
@@ -0,0 +1,92 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use api::v1::meta::{HeartbeatRequest, Peer, Role};
+use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
+use common_meta::key::{MetadataKey, MetadataValue};
+use common_meta::rpc::store::PutRequest;
+use common_telemetry::{error, info, warn};
+use dashmap::DashMap;
+
+use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
+use crate::metasrv::Context;
+use crate::Result;
+
+#[derive(Debug, Default)]
+pub struct RemapFlowPeerHandler {
+    /// flow_node_id -> epoch
+    epoch_cache: DashMap<u64, u64>,
+}
+
+#[async_trait::async_trait]
+impl HeartbeatHandler for RemapFlowPeerHandler {
+    fn is_acceptable(&self, role: Role) -> bool {
+        role == Role::Flownode
+    }
+
+    async fn handle(
+        &self,
+        req: &HeartbeatRequest,
+        ctx: &mut Context,
+        _acc: &mut HeartbeatAccumulator,
+    ) -> Result<HandleControl> {
+        let Some(peer) = req.peer.as_ref() else {
+            return Ok(HandleControl::Continue);
+        };
+
+        let current_epoch = req.node_epoch;
+        let flow_node_id = peer.id;
+
+        let refresh = if let Some(mut epoch) = self.epoch_cache.get_mut(&flow_node_id) {
+            if current_epoch > *epoch.value() {
+                *epoch.value_mut() = current_epoch;
+                true
+            } else {
+                false
+            }
+        } else {
+            self.epoch_cache.insert(flow_node_id, current_epoch);
+            true
+        };
+
+        if refresh {
+            rewrite_node_address(ctx, peer).await;
+        }
+
+        Ok(HandleControl::Continue)
+    }
+}
+
+async fn rewrite_node_address(ctx: &mut Context, peer: &Peer) {
+    let key = NodeAddressKey::with_flownode(peer.id).to_bytes();
+    if let Ok(value) = NodeAddressValue::new(peer.clone().into()).try_as_raw_value() {
+        let put = PutRequest {
+            key,
+            value,
+            prev_kv: false,
+        };
+
+        match ctx.leader_cached_kv_backend.put(put).await {
+            Ok(_) => {
+                info!("Successfully updated flow `NodeAddressValue`: {:?}", peer);
+                // TODO(discord): broadcast invalidating cache to all frontends
+            }
+            Err(e) => {
+                error!(e; "Failed to update flow `NodeAddressValue`: {:?}", peer);
+            }
+        }
+    } else {
+        warn!("Failed to serialize flow `NodeAddressValue`: {:?}", peer);
+    }
+}
--- a/Show More
+++ b/Show More