setup qemu action

test dev builder
2026-01-05 12:52:57 +00:00 · 2025-03-05 13:55:39 +08:00 · 2025-03-05 13:43:41 +08:00 · 2025-03-05 13:34:14 +08:00 · 2025-03-04 22:18:05 +08:00 · 2025-03-04 21:05:19 +08:00
272 changed files with 7119 additions and 10884 deletions
--- a/.github/actions/build-dev-builder-images/action.yml
+++ b/.github/actions/build-dev-builder-images/action.yml
@@ -48,7 +48,7 @@ runs:
        # The latest version will lead to segmentation fault.
        image: tonistiigi/binfmt:qemu-v7.0.0-28
-    - name: Build and push dev-builder-ubuntu image # Build image for amd64 and arm64 platform.
+    - name: Build and push dev-builder-ubuntu image
      shell: bash
      if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
      run: |
@@ -59,7 +59,7 @@ runs:
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
          DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
-    - name: Build and push dev-builder-centos image # Only build image for amd64 platform.
+    - name: Build and push dev-builder-centos image
      shell: bash
      if: ${{ inputs.build-dev-builder-centos == 'true' }}
      run: |
@@ -80,3 +80,4 @@ runs:
          IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
          IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
          DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
--- a/.github/actions/start-runner/action.yml
+++ b/.github/actions/start-runner/action.yml
@@ -56,7 +56,7 @@ runs:
    - name: Start EC2 runner
      if: startsWith(inputs.runner, 'ec2')
-      uses: machulav/ec2-github-runner@v2.3.8
+      uses: machulav/ec2-github-runner@v2
      id: start-linux-arm64-ec2-runner
      with:
        mode: start
--- a/.github/actions/stop-runner/action.yml
+++ b/.github/actions/stop-runner/action.yml
@@ -33,7 +33,7 @@ runs:
    - name: Stop EC2 runner
      if: ${{ inputs.label && inputs.ec2-instance-id }}
-      uses: machulav/ec2-github-runner@v2.3.8
+      uses: machulav/ec2-github-runner@v2
      with:
        mode: stop
        label: ${{ inputs.label }}
--- a/.github/workflows/apidoc.yml
+++ b/.github/workflows/apidoc.yml
@@ -14,7 +14,7 @@ name: Build API docs
 jobs:
  apidoc:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v4
      with:
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -16,11 +16,11 @@ on:
        description: The runner uses to build linux-amd64 artifacts
        default: ec2-c6i.4xlarge-amd64
        options:
-          - ubuntu-22.04
+          - ubuntu-20.04
-          - ubuntu-22.04-8-cores
+          - ubuntu-20.04-8-cores
-          - ubuntu-22.04-16-cores
+          - ubuntu-20.04-16-cores
-          - ubuntu-22.04-32-cores
+          - ubuntu-20.04-32-cores
-          - ubuntu-22.04-64-cores
+          - ubuntu-20.04-64-cores
          - ec2-c6i.xlarge-amd64 # 4C8G
          - ec2-c6i.2xlarge-amd64 # 8C16G
          - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -83,7 +83,7 @@ jobs:
  allocate-runners:
    name: Allocate runners
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    outputs:
      linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
      linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -218,7 +218,7 @@ jobs:
      build-linux-amd64-artifacts,
      build-linux-arm64-artifacts,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    outputs:
      build-result: ${{ steps.set-build-result.outputs.build-result }}
    steps:
@@ -251,7 +251,7 @@ jobs:
      allocate-runners,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    continue-on-error: true
    steps:
      - uses: actions/checkout@v4
@@ -283,7 +283,7 @@ jobs:
    name: Stop linux-amd64 runner
    # Only run this job when the runner is allocated.
    if: ${{ always() }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: [
      allocate-runners,
      build-linux-amd64-artifacts,
@@ -309,7 +309,7 @@ jobs:
    name: Stop linux-arm64 runner
    # Only run this job when the runner is allocated.
    if: ${{ always() }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: [
      allocate-runners,
      build-linux-arm64-artifacts,
@@ -337,7 +337,7 @@ jobs:
    needs: [
      release-images-to-dockerhub
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    permissions:
      issues: write
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -23,7 +23,7 @@ concurrency:
 jobs:
  check-typos-and-docs:
    name: Check typos and docs
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
        with:
@@ -36,7 +36,7 @@ jobs:
          || (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
  license-header-check:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    name: Check License Header
    steps:
      - uses: actions/checkout@v4
@@ -49,7 +49,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-20.04 ]
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -72,7 +72,7 @@ jobs:
  toml:
    name: Toml Check
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -89,7 +89,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-20.04 ]
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -248,7 +248,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-20.04 ]
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -568,7 +568,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-20.04 ]
        mode:
          - name: "Basic"
            opts: ""
@@ -607,7 +607,7 @@ jobs:
  fmt:
    name: Rustfmt
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -624,7 +624,7 @@ jobs:
  clippy:
    name: Clippy
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -710,7 +710,7 @@ jobs:
  coverage:
    if: github.event_name == 'merge_group'
-    runs-on: ubuntu-22.04-8-cores
+    runs-on: ubuntu-20.04-8-cores
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
@@ -770,7 +770,7 @@ jobs:
  # compat:
  #   name: Compatibility Test
  #   needs: build
-  #   runs-on: ubuntu-22.04
+  #   runs-on: ubuntu-20.04
  #   timeout-minutes: 60
  #   steps:
  #     - uses: actions/checkout@v4
--- a/.github/workflows/docbot.yml
+++ b/.github/workflows/docbot.yml
@@ -9,7 +9,7 @@ concurrency:
 jobs:
  docbot:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    permissions:
      pull-requests: write
      contents: read
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -31,7 +31,7 @@ name: CI
 jobs:
  typos:
    name: Spell Check with Typos
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
        with:
@@ -39,7 +39,7 @@ jobs:
      - uses: crate-ci/typos@master
  license-header-check:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    name: Check License Header
    steps:
      - uses: actions/checkout@v4
@@ -49,29 +49,29 @@ jobs:
  check:
    name: Check
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'
  fmt:
    name: Rustfmt
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'
  clippy:
    name: Clippy
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'
  coverage:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'
  test:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - run: 'echo "No action required"'
@@ -80,7 +80,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-20.04 ]
        mode:
          - name: "Basic"
          - name: "Remote WAL"
--- a/.github/workflows/grafana.yml
+++ b/.github/workflows/grafana.yml
@@ -1,52 +0,0 @@
 name: Check Grafana Panels
 on:
  pull_request:
    branches:
      - main
    paths:
      - 'grafana/**'  # Trigger only when files under the grafana/ directory change
 jobs:
  check-panels:
    runs-on: ubuntu-latest
    steps:
      # Check out the repository
      - name: Checkout repository
        uses: actions/checkout@v4
      # Install jq (required for the script)
      - name: Install jq
        run: sudo apt-get install -y jq
      # Make the check.sh script executable
      - name: Make check.sh executable
        run: chmod +x grafana/check.sh
      # Run the check.sh script
      - name: Run check.sh
        run: ./grafana/check.sh
      # Only run summary.sh for pull_request events (not for merge queues or final pushes)
      - name: Check if this is a pull request
        id: check-pr
        run: |
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            echo "is_pull_request=true" >> $GITHUB_OUTPUT
          else
            echo "is_pull_request=false" >> $GITHUB_OUTPUT
          fi
      # Make the summary.sh script executable
      - name: Make summary.sh executable
        if: steps.check-pr.outputs.is_pull_request == 'true'
        run: chmod +x grafana/summary.sh
      # Run the summary.sh script and add its output to the GitHub Job Summary
      - name: Run summary.sh and add to Job Summary
        if: steps.check-pr.outputs.is_pull_request == 'true'
        run: |
          SUMMARY=$(./grafana/summary.sh)
          echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -14,11 +14,11 @@ on:
        description: The runner uses to build linux-amd64 artifacts
        default: ec2-c6i.4xlarge-amd64
        options:
-          - ubuntu-22.04
+          - ubuntu-20.04
-          - ubuntu-22.04-8-cores
+          - ubuntu-20.04-8-cores
-          - ubuntu-22.04-16-cores
+          - ubuntu-20.04-16-cores
-          - ubuntu-22.04-32-cores
+          - ubuntu-20.04-32-cores
-          - ubuntu-22.04-64-cores
+          - ubuntu-20.04-64-cores
          - ec2-c6i.xlarge-amd64 # 4C8G
          - ec2-c6i.2xlarge-amd64 # 8C16G
          - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -70,7 +70,7 @@ jobs:
  allocate-runners:
    name: Allocate runners
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    outputs:
      linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
      linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -182,7 +182,7 @@ jobs:
      build-linux-amd64-artifacts,
      build-linux-arm64-artifacts,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    outputs:
      nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
    steps:
@@ -214,7 +214,7 @@ jobs:
      allocate-runners,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    # When we push to ACR, it's easy to fail due to some unknown network issues.
    # However, we don't want to fail the whole workflow because of this.
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -249,7 +249,7 @@ jobs:
    name: Stop linux-amd64 runner
    # Only run this job when the runner is allocated.
    if: ${{ always() }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: [
      allocate-runners,
      build-linux-amd64-artifacts,
@@ -275,7 +275,7 @@ jobs:
    name: Stop linux-arm64 runner
    # Only run this job when the runner is allocated.
    if: ${{ always() }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: [
      allocate-runners,
      build-linux-arm64-artifacts,
@@ -303,7 +303,7 @@ jobs:
    needs: [
      release-images-to-dockerhub
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    permissions:
      issues: write
    env:
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -13,7 +13,7 @@ jobs:
  sqlness-test:
    name: Run sqlness test
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -133,7 +133,7 @@ jobs:
    name: Check status
    needs: [sqlness-test, sqlness-windows, test-on-windows]
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    outputs:
      check-result: ${{ steps.set-check-result.outputs.check-result }}
    steps:
@@ -146,7 +146,7 @@ jobs:
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run.
    name: Send notification to Greptime team
    needs: [check-status]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    env:
      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
    steps:
--- a/.github/workflows/release-dev-builder-images.yaml
+++ b/.github/workflows/release-dev-builder-images.yaml
@@ -29,7 +29,7 @@ jobs:
  release-dev-builder-images:
    name: Release dev builder images
    if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04-16-cores
    outputs:
      version: ${{ steps.set-version.outputs.version }}
    steps:
@@ -63,7 +63,7 @@ jobs:
  release-dev-builder-images-ecr:
    name: Release dev builder images to AWS ECR
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
    needs: [
      release-dev-builder-images
    ]
@@ -148,7 +148,7 @@ jobs:
  release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
    name: Release dev builder images to CN region
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
    needs: [
      release-dev-builder-images
    ]
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -18,11 +18,11 @@ on:
        description: The runner uses to build linux-amd64 artifacts
        default: ec2-c6i.4xlarge-amd64
        options:
-          - ubuntu-22.04
+          - ubuntu-20.04
-          - ubuntu-22.04-8-cores
+          - ubuntu-20.04-8-cores
-          - ubuntu-22.04-16-cores
+          - ubuntu-20.04-16-cores
-          - ubuntu-22.04-32-cores
+          - ubuntu-20.04-32-cores
-          - ubuntu-22.04-64-cores
+          - ubuntu-20.04-64-cores
          - ec2-c6i.xlarge-amd64 # 4C8G
          - ec2-c6i.2xlarge-amd64 # 8C16G
          - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -97,7 +97,7 @@ jobs:
  allocate-runners:
    name: Allocate runners
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    outputs:
      linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
      linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -299,7 +299,7 @@ jobs:
      build-linux-amd64-artifacts,
      build-linux-arm64-artifacts,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-2004-16-cores
    outputs:
      build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }}
    steps:
@@ -335,7 +335,7 @@ jobs:
      build-windows-artifacts,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    # When we push to ACR, it's easy to fail due to some unknown network issues.
    # However, we don't want to fail the whole workflow because of this.
    # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -377,7 +377,7 @@ jobs:
      build-windows-artifacts,
      release-images-to-dockerhub,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
        with:
@@ -396,7 +396,7 @@ jobs:
    name: Stop linux-amd64 runner
    # Only run this job when the runner is allocated.
    if: ${{ always() }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: [
      allocate-runners,
      build-linux-amd64-artifacts,
@@ -422,7 +422,7 @@ jobs:
    name: Stop linux-arm64 runner
    # Only run this job when the runner is allocated.
    if: ${{ always() }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    needs: [
      allocate-runners,
      build-linux-arm64-artifacts,
@@ -448,7 +448,7 @@ jobs:
    name: Bump doc version
    if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
    needs: [allocate-runners]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
    permissions:
      issues: write # Allows the action to create issues for cyborg.
@@ -475,7 +475,7 @@ jobs:
      build-macos-artifacts,
      build-windows-artifacts,
    ]
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
    permissions:
      issues: write # Allows the action to create issues for cyborg.
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -13,7 +13,7 @@ concurrency:
 jobs:
  check:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4167,7 +4167,6 @@ dependencies = [
 "bytes",
 "cache",
 "catalog",
 "chrono",
 "client",
 "common-base",
 "common-catalog",
@@ -4702,7 +4701,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486#d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=072ce580502e015df1a6b03a185b60309a7c2a7a#072ce580502e015df1a6b03a185b60309a7c2a7a"
 dependencies = [
 "prost 0.13.3",
 "serde",
@@ -5567,7 +5566,6 @@ dependencies = [
 "rand",
 "regex",
 "regex-automata 0.4.8",
 "roaring",
 "serde",
 "serde_json",
 "snafu 0.8.5",
@@ -5899,15 +5897,15 @@ dependencies = [
 [[package]]
 name = "jsonpath-rust"
-version = "0.7.5"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b"
+checksum = "69a61b87f6a55cc6c28fed5739dd36b9642321ce63e4a5e4a4715d69106f4a10"
 dependencies = [
 "pest",
 "pest_derive",
 "regex",
 "serde_json",
- "thiserror 2.0.12",
+ "thiserror 1.0.64",
 ]
 [[package]]
@@ -8272,7 +8270,7 @@ dependencies = [
 "rand",
 "ring",
 "rust_decimal",
- "thiserror 2.0.12",
+ "thiserror 2.0.6",
 "tokio",
 "tokio-rustls 0.26.0",
 "tokio-util",
@@ -8384,7 +8382,7 @@ dependencies = [
 "greptime-proto",
 "itertools 0.10.5",
 "jsonb",
- "jsonpath-rust 0.7.5",
+ "jsonpath-rust 0.7.3",
 "lazy_static",
 "moka",
 "once_cell",
@@ -8762,7 +8760,6 @@ dependencies = [
 "common-recordbatch",
 "common-telemetry",
 "datafusion",
 "datafusion-common",
 "datafusion-expr",
 "datatypes",
 "futures",
@@ -8776,9 +8773,8 @@ dependencies = [
 [[package]]
 name = "promql-parser"
-version = "0.5.0"
+version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/GreptimeTeam/promql-parser.git?rev=27abb8e16003a50c720f00d6c85f41f5fa2a2a8e#27abb8e16003a50c720f00d6c85f41f5fa2a2a8e"
 checksum = "7c6b1429bdd199d53bd58b745075c1652efedbe2746e5d4f0d56d3184dda48ec"
 dependencies = [
 "cfgrammar",
 "chrono",
@@ -9636,16 +9632,6 @@ dependencies = [
 "syn 1.0.109",
 ]
 [[package]]
 name = "roaring"
 version = "0.10.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661"
 dependencies = [
 "bytemuck",
 "byteorder",
 ]
 [[package]]
 name = "robust"
 version = "1.1.0"
@@ -11065,7 +11051,7 @@ dependencies = [
 "serde_json",
 "sha2",
 "smallvec",
- "thiserror 2.0.12",
+ "thiserror 2.0.6",
 "tokio",
 "tokio-stream",
 "tracing",
@@ -11150,7 +11136,7 @@ dependencies = [
 "smallvec",
 "sqlx-core",
 "stringprep",
- "thiserror 2.0.12",
+ "thiserror 2.0.6",
 "tracing",
 "whoami",
 ]
@@ -11188,7 +11174,7 @@ dependencies = [
 "smallvec",
 "sqlx-core",
 "stringprep",
- "thiserror 2.0.12",
+ "thiserror 2.0.6",
 "tracing",
 "whoami",
 ]
@@ -11969,11 +11955,11 @@ dependencies = [
 [[package]]
 name = "thiserror"
-version = "2.0.12"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
+checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
 dependencies = [
- "thiserror-impl 2.0.12",
+ "thiserror-impl 2.0.6",
 ]
 [[package]]
@@ -11989,9 +11975,9 @@ dependencies = [
 [[package]]
 name = "thiserror-impl"
-version = "2.0.12"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
+checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -129,7 +129,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "072ce580502e015df1a6b03a185b60309a7c2a7a" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -160,7 +160,9 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
 paste = "1.0"
 pin-project = "1.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-promql-parser = { version = "0.5", features = ["ser"] }
+promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
    "ser",
 ], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
 prost = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.8"
--- a/4
+++ b/4
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
 IMAGE_REGISTRY ?= docker.io
 IMAGE_NAMESPACE ?= greptime
 IMAGE_TAG ?= latest
-DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-a71b93dd-20250305072908
+DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-9d0fa5d5-20250124085746
 BUILDX_MULTI_PLATFORM_BUILD ?= false
 BUILDX_BUILDER_NAME ?= gtbuilder
 BASE_IMAGE ?= ubuntu
@@ -61,7 +61,7 @@ ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all)
 else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
 	BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
 else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), arm64)
-	BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/arm64 --push
+        BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/arm64 --push
 else
 	BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
 endif
--- a/docker/buildx/ubuntu/Dockerfile
+++ b/docker/buildx/ubuntu/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:22.04 as builder
+FROM ubuntu:20.04 as builder
 ARG CARGO_PROFILE
 ARG FEATURES
--- a/docker/ci/ubuntu/Dockerfile.fuzztests
+++ b/docker/ci/ubuntu/Dockerfile.fuzztests
@@ -1,4 +1,4 @@
-FROM ubuntu:latest
+FROM ubuntu:22.04
 # The binary name of GreptimeDB executable.
 # Defaults to "greptime", but sometimes in other projects it might be different.
--- a/docker/dev-builder/ubuntu/Dockerfile-20.04
+++ b/docker/dev-builder/ubuntu/Dockerfile-20.04
@@ -41,7 +41,7 @@ RUN mv protoc3/include/* /usr/local/include/
 # and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
 # of the Git's addition to the "safe.directory" at the first place (see the commit message here:
 # https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
-# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using 
+# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
 # wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
 # It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
 # it can be a different user that have prepared the submodules.
--- a/grafana/check.sh
+++ b/grafana/check.sh
@@ -1,19 +0,0 @@
 #!/usr/bin/env bash
 BASEDIR=$(dirname "$0")
 # Use jq to check for panels with empty or missing descriptions
 invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
  .panels[]
  | select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
 ')
 # Check if any invalid panels were found
 if [[ -n "$invalid_panels" ]]; then
  echo "Error: The following panels have empty or missing descriptions:"
  echo "$invalid_panels"
  exit 1
 else
  echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
  exit 0
 fi
--- a/grafana/greptimedb-cluster.json
+++ b/grafana/greptimedb-cluster.json
--- a/grafana/summary.sh
+++ b/grafana/summary.sh
@@ -1,11 +0,0 @@
 #!/usr/bin/env bash
 BASEDIR=$(dirname "$0")
 echo '| Title | Description | Expressions |
 |---|---|---|'
 cat $BASEDIR/greptimedb-cluster.json | jq -r '
  .panels |
  map(select(.type == "stat" or .type == "timeseries")) |
  .[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`")  | join("<br>")) |"
 '
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -38,7 +38,6 @@ use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
 use session::context::{Channel, QueryContext};
 use snafu::prelude::*;
 use table::dist_table::DistTable;
 use table::metadata::TableId;
 use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
 use table::table_name::TableName;
 use table::TableRef;
@@ -287,28 +286,6 @@ impl CatalogManager for KvBackendCatalogManager {
        return Ok(None);
    }
    async fn tables_by_ids(
        &self,
        catalog: &str,
        schema: &str,
        table_ids: &[TableId],
    ) -> Result<Vec<TableRef>> {
        let table_info_values = self
            .table_metadata_manager
            .table_info_manager()
            .batch_get(table_ids)
            .await
            .context(TableMetadataManagerSnafu)?;
        let tables = table_info_values
            .into_values()
            .filter(|t| t.table_info.catalog_name == catalog && t.table_info.schema_name == schema)
            .map(build_table)
            .collect::<Result<Vec<_>>>()?;
        Ok(tables)
    }
    fn tables<'a>(
        &'a self,
        catalog: &'a str,
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -87,14 +87,6 @@ pub trait CatalogManager: Send + Sync {
        query_ctx: Option<&QueryContext>,
    ) -> Result<Option<TableRef>>;
    /// Returns the tables by table ids.
    async fn tables_by_ids(
        &self,
        catalog: &str,
        schema: &str,
        table_ids: &[TableId],
    ) -> Result<Vec<TableRef>>;
    /// Returns all tables with a stream by catalog and schema.
    fn tables<'a>(
        &'a self,
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -14,7 +14,7 @@
 use std::any::Any;
 use std::collections::hash_map::Entry;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::sync::{Arc, RwLock, Weak};
 use async_stream::{stream, try_stream};
@@ -28,7 +28,6 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
 use futures_util::stream::BoxStream;
 use session::context::QueryContext;
 use snafu::OptionExt;
 use table::metadata::TableId;
 use table::TableRef;
 use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
@@ -144,33 +143,6 @@ impl CatalogManager for MemoryCatalogManager {
        Ok(result)
    }
    async fn tables_by_ids(
        &self,
        catalog: &str,
        schema: &str,
        table_ids: &[TableId],
    ) -> Result<Vec<TableRef>> {
        let catalogs = self.catalogs.read().unwrap();
        let schemas = catalogs.get(catalog).context(CatalogNotFoundSnafu {
            catalog_name: catalog,
        })?;
        let tables = schemas
            .get(schema)
            .context(SchemaNotFoundSnafu { catalog, schema })?;
        let filter_ids: HashSet<_> = table_ids.iter().collect();
        // It is very inefficient, but we do not need to optimize it since it will not be called in `MemoryCatalogManager`.
        let tables = tables
            .values()
            .filter(|t| filter_ids.contains(&t.table_info().table_id()))
            .cloned()
            .collect::<Vec<_>>();
        Ok(tables)
    }
    fn tables<'a>(
        &'a self,
        catalog: &'a str,
--- a/src/client/src/lib.rs
+++ b/src/client/src/lib.rs
@@ -16,6 +16,7 @@
 mod client;
 pub mod client_manager;
 #[cfg(feature = "testing")]
 mod database;
 pub mod error;
 pub mod flow;
@@ -33,6 +34,7 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
 use snafu::OptionExt;
 pub use self::client::Client;
 #[cfg(feature = "testing")]
 pub use self::database::Database;
 pub use self::error::{Error, Result};
 use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};
--- a/src/cmd/src/datanode.rs
+++ b/src/cmd/src/datanode.rs
@@ -287,6 +287,7 @@ impl StartCommand {
            .await
            .context(StartDatanodeSnafu)?;
        let cluster_id = 0; // TODO(hl): read from config
        let member_id = opts
            .node_id
            .context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -295,10 +296,13 @@ impl StartCommand {
            msg: "'meta_client_options'",
        })?;
-        let meta_client =
+        let meta_client = meta_client::create_meta_client(
-            meta_client::create_meta_client(MetaClientType::Datanode { member_id }, meta_config)
+            cluster_id,
-                .await
+            MetaClientType::Datanode { member_id },
-                .context(MetaClientInitSnafu)?;
+            meta_config,
        )
        .await
        .context(MetaClientInitSnafu)?;
        let meta_backend = Arc::new(MetaKvBackend {
            client: meta_client.clone(),
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
 use common_telemetry::logging::TracingOptions;
 use common_version::{short_version, version};
-use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
+use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
 use meta_client::{MetaClientOptions, MetaClientType};
 use servers::Mode;
 use snafu::{OptionExt, ResultExt};
@@ -241,6 +241,9 @@ impl StartCommand {
        let mut opts = opts.component;
        opts.grpc.detect_server_addr();
        // TODO(discord9): make it not optionale after cluster id is required
        let cluster_id = opts.cluster_id.unwrap_or(0);
        let member_id = opts
            .node_id
            .context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -249,10 +252,13 @@ impl StartCommand {
            msg: "'meta_client_options'",
        })?;
-        let meta_client =
+        let meta_client = meta_client::create_meta_client(
-            meta_client::create_meta_client(MetaClientType::Flownode { member_id }, meta_config)
+            cluster_id,
-                .await
+            MetaClientType::Flownode { member_id },
-                .context(MetaClientInitSnafu)?;
+            meta_config,
        )
        .await
        .context(MetaClientInitSnafu)?;
        let cache_max_capacity = meta_config.metadata_cache_max_capacity;
        let cache_ttl = meta_config.metadata_cache_ttl;
@@ -311,8 +317,6 @@ impl StartCommand {
            Arc::new(executor),
        );
        let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
        let flownode_builder = FlownodeBuilder::new(
            opts,
@@ -320,7 +324,6 @@ impl StartCommand {
            table_metadata_manager,
            catalog_manager.clone(),
            flow_metadata_manager,
            Arc::new(frontend_client),
        )
        .with_heartbeat_task(heartbeat_task);
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -295,10 +295,14 @@ impl StartCommand {
        let cache_ttl = meta_client_options.metadata_cache_ttl;
        let cache_tti = meta_client_options.metadata_cache_tti;
-        let meta_client =
+        let cluster_id = 0; // (TODO: jeremy): It is currently a reserved field and has not been enabled.
-            meta_client::create_meta_client(MetaClientType::Frontend, meta_client_options)
+        let meta_client = meta_client::create_meta_client(
-                .await
+            cluster_id,
-                .context(MetaClientInitSnafu)?;
+            MetaClientType::Frontend,
            meta_client_options,
        )
        .await
        .context(MetaClientInitSnafu)?;
        // TODO(discord9): add helper function to ease the creation of cache registry&such
        let cached_meta_backend =
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -54,10 +54,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
 use datanode::datanode::{Datanode, DatanodeBuilder};
 use datanode::region_server::RegionServer;
 use file_engine::config::EngineConfig as FileEngineConfig;
-use flow::{
+use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
    FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
    FrontendInvoker,
 };
 use frontend::frontend::FrontendOptions;
 use frontend::instance::builder::FrontendBuilder;
 use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
@@ -536,16 +533,12 @@ impl StartCommand {
            flow: opts.flow.clone(),
            ..Default::default()
        };
        let fe_server_addr = fe_opts.grpc.bind_addr.clone();
        let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
        let flow_builder = FlownodeBuilder::new(
            flownode_options,
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
            flow_metadata_manager.clone(),
            Arc::new(frontend_client),
        );
        let flownode = Arc::new(
            flow_builder
--- a/src/common/catalog/src/consts.rs
+++ b/src/common/catalog/src/consts.rs
@@ -130,10 +130,3 @@ pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIMESTAMP";
 pub fn is_readonly_schema(schema: &str) -> bool {
    matches!(schema, INFORMATION_SCHEMA_NAME)
 }
 // ---- special table and fields ----
 pub const TRACE_ID_COLUMN: &str = "trace_id";
 pub const SPAN_ID_COLUMN: &str = "span_id";
 pub const SPAN_NAME_COLUMN: &str = "span_name";
 pub const PARENT_SPAN_ID_COLUMN: &str = "parent_span_id";
 // ---- End of special table and fields ----
--- a/src/common/function/src/aggr/hll.rs
+++ b/src/common/function/src/aggr/hll.rs
@@ -12,16 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //! Two UDAFs are implemented for HyperLogLog:
 //!
 //! - `hll`: Accepts a string column and aggregates the values into a
 //!   HyperLogLog state.
 //! - `hll_merge`: Accepts a binary column of states generated by `hll`
 //!   and merges them into a single state.
 //!
 //! The states can be then used to estimate the cardinality of the
 //! values in the column by `hll_count` UDF.
 use std::sync::Arc;
 use common_query::prelude::*;
--- a/src/common/function/src/aggr/uddsketch_state.rs
+++ b/src/common/function/src/aggr/uddsketch_state.rs
@@ -12,12 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //! Implementation of the `uddsketch_state` UDAF that generate the state of
 //! UDDSketch for a given set of values.
 //!
 //! The generated state can be used to compute approximate quantiles using
 //! `uddsketch_calc` UDF.
 use std::sync::Arc;
 use common_query::prelude::*;
--- a/src/common/function/src/scalars/aggregate.rs
+++ b/src/common/function/src/scalars/aggregate.rs
@@ -12,16 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//! # Deprecate Warning:
+mod argmax;
-//!
+mod argmin;
-//! This module is deprecated and will be removed in the future.
+mod diff;
-//! All UDAF implementation here are not maintained and should
+mod mean;
-//! not be used before they are refactored into the `src/aggr`
+mod polyval;
-//! version.
+mod scipy_stats_norm_cdf;
 mod scipy_stats_norm_pdf;
 use std::sync::Arc;
 pub use argmax::ArgmaxAccumulatorCreator;
 pub use argmin::ArgminAccumulatorCreator;
 use common_query::logical_plan::AggregateFunctionCreatorRef;
 pub use diff::DiffAccumulatorCreator;
 pub use mean::MeanAccumulatorCreator;
 pub use polyval::PolyvalAccumulatorCreator;
 pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
 pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
 use crate::function_registry::FunctionRegistry;
 use crate::scalars::vector::product::VectorProductCreator;
@@ -68,22 +76,31 @@ pub(crate) struct AggregateFunctions;
 impl AggregateFunctions {
    pub fn register(registry: &FunctionRegistry) {
-        registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
+        macro_rules! register_aggr_func {
-            "vec_sum",
+            ($name :expr, $arg_count :expr, $creator :ty) => {
-            1,
+                registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
-            Arc::new(|| Arc::new(VectorSumCreator::default())),
+                    $name,
-        )));
+                    $arg_count,
-        registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
+                    Arc::new(|| Arc::new(<$creator>::default())),
-            "vec_product",
+                )));
-            1,
+            };
-            Arc::new(|| Arc::new(VectorProductCreator::default())),
+        }
-        )));
+
        register_aggr_func!("diff", 1, DiffAccumulatorCreator);
        register_aggr_func!("mean", 1, MeanAccumulatorCreator);
        register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
        register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
        register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
        register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
        register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
        register_aggr_func!("vec_sum", 1, VectorSumCreator);
        register_aggr_func!("vec_product", 1, VectorProductCreator);
        #[cfg(feature = "geo")]
-        registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
+        register_aggr_func!(
            "json_encode_path",
            3,
-            Arc::new(|| Arc::new(super::geo::encoding::JsonPathEncodeFunctionCreator::default())),
+            super::geo::encoding::JsonPathEncodeFunctionCreator
-        )));
+        );
    }
 }
--- a/src/common/function/src/scalars/aggregate/argmax.rs
+++ b/src/common/function/src/scalars/aggregate/argmax.rs
@@ -0,0 +1,208 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::cmp::Ordering;
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::types::{LogicalPrimitiveType, WrapperType};
 use datatypes::vectors::{ConstantVector, Helper};
 use datatypes::with_match_primitive_type_id;
 use snafu::ensure;
 // https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
 // return the index of the max value
 #[derive(Debug, Default)]
 pub struct Argmax<T> {
    max: Option<T>,
    n: u64,
 }
 impl<T> Argmax<T>
 where
    T: PartialOrd + Copy,
 {
    fn update(&mut self, value: T, index: u64) {
        if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
            self.max = Some(value);
            self.n = index;
        }
    }
 }
 impl<T> Accumulator for Argmax<T>
 where
    T: WrapperType + PartialOrd,
 {
    fn state(&self) -> Result<Vec<Value>> {
        match self.max {
            Some(max) => Ok(vec![max.into(), self.n.into()]),
            _ => Ok(vec![Value::Null, self.n.into()]),
        }
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        let column = &values[0];
        let column: &<T as Scalar>::VectorType = if column.is_const() {
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        for (i, v) in column.iter_data().enumerate() {
            if let Some(value) = v {
                self.update(value, i as u64);
            }
        }
        Ok(())
    }
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        ensure!(
            states.len() == 2,
            BadAccumulatorImplSnafu {
                err_msg: "expect 2 states in `merge_batch`",
            }
        );
        let max = &states[0];
        let index = &states[1];
        let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
        let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
        index
            .iter_data()
            .flatten()
            .zip(max.iter_data().flatten())
            .for_each(|(i, max)| self.update(max, i));
        Ok(())
    }
    fn evaluate(&self) -> Result<Value> {
        match self.max {
            Some(_) => Ok(self.n.into()),
            _ => Ok(Value::Null),
        }
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct ArgmaxAccumulatorCreator {}
 impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
                },
                {
                    let err_msg = format!(
                        "\"ARGMAX\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        Ok(ConcreteDataType::uint64_datatype())
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 1, InvalidInputStateSnafu);
        Ok(vec![
            input_types.into_iter().next().unwrap(),
            ConcreteDataType::uint64_datatype(),
        ])
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::Int32Vector;
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut argmax = Argmax::<i32>::default();
        argmax.update_batch(&[]).unwrap();
        assert_eq!(Value::Null, argmax.evaluate().unwrap());
        // test update one not-null value
        let mut argmax = Argmax::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
        argmax.update_batch(&v).unwrap();
        assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
        // test update one null value
        let mut argmax = Argmax::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
        argmax.update_batch(&v).unwrap();
        assert_eq!(Value::Null, argmax.evaluate().unwrap());
        // test update no null-value batch
        let mut argmax = Argmax::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-1i32),
            Some(1),
            Some(3),
        ]))];
        argmax.update_batch(&v).unwrap();
        assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
        // test update null-value batch
        let mut argmax = Argmax::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-2i32),
            None,
            Some(4),
        ]))];
        argmax.update_batch(&v).unwrap();
        assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
        // test update with constant vector
        let mut argmax = Argmax::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
            Arc::new(Int32Vector::from_vec(vec![4])),
            10,
        ))];
        argmax.update_batch(&v).unwrap();
        assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
    }
 }
--- a/src/common/function/src/scalars/aggregate/argmin.rs
+++ b/src/common/function/src/scalars/aggregate/argmin.rs
@@ -0,0 +1,216 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::cmp::Ordering;
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::vectors::{ConstantVector, Helper};
 use datatypes::with_match_primitive_type_id;
 use snafu::ensure;
 // // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
 #[derive(Debug, Default)]
 pub struct Argmin<T> {
    min: Option<T>,
    n: u32,
 }
 impl<T> Argmin<T>
 where
    T: Copy + PartialOrd,
 {
    fn update(&mut self, value: T, index: u32) {
        match self.min {
            Some(min) => {
                if let Some(Ordering::Greater) = min.partial_cmp(&value) {
                    self.min = Some(value);
                    self.n = index;
                }
            }
            None => {
                self.min = Some(value);
                self.n = index;
            }
        }
    }
 }
 impl<T> Accumulator for Argmin<T>
 where
    T: WrapperType + PartialOrd,
 {
    fn state(&self) -> Result<Vec<Value>> {
        match self.min {
            Some(min) => Ok(vec![min.into(), self.n.into()]),
            _ => Ok(vec![Value::Null, self.n.into()]),
        }
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        ensure!(values.len() == 1, InvalidInputStateSnafu);
        let column = &values[0];
        let column: &<T as Scalar>::VectorType = if column.is_const() {
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        for (i, v) in column.iter_data().enumerate() {
            if let Some(value) = v {
                self.update(value, i as u32);
            }
        }
        Ok(())
    }
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        ensure!(
            states.len() == 2,
            BadAccumulatorImplSnafu {
                err_msg: "expect 2 states in `merge_batch`",
            }
        );
        let min = &states[0];
        let index = &states[1];
        let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
        let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
        index
            .iter_data()
            .flatten()
            .zip(min.iter_data().flatten())
            .for_each(|(i, min)| self.update(min, i));
        Ok(())
    }
    fn evaluate(&self) -> Result<Value> {
        match self.min {
            Some(_) => Ok(self.n.into()),
            _ => Ok(Value::Null),
        }
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct ArgminAccumulatorCreator {}
 impl AggregateFunctionCreator for ArgminAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
                },
                {
                    let err_msg = format!(
                        "\"ARGMIN\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        Ok(ConcreteDataType::uint32_datatype())
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 1, InvalidInputStateSnafu);
        Ok(vec![
            input_types.into_iter().next().unwrap(),
            ConcreteDataType::uint32_datatype(),
        ])
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::Int32Vector;
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut argmin = Argmin::<i32>::default();
        argmin.update_batch(&[]).unwrap();
        assert_eq!(Value::Null, argmin.evaluate().unwrap());
        // test update one not-null value
        let mut argmin = Argmin::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
        argmin.update_batch(&v).unwrap();
        assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
        // test update one null value
        let mut argmin = Argmin::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
        argmin.update_batch(&v).unwrap();
        assert_eq!(Value::Null, argmin.evaluate().unwrap());
        // test update no null-value batch
        let mut argmin = Argmin::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-1i32),
            Some(1),
            Some(3),
        ]))];
        argmin.update_batch(&v).unwrap();
        assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
        // test update null-value batch
        let mut argmin = Argmin::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-2i32),
            None,
            Some(4),
        ]))];
        argmin.update_batch(&v).unwrap();
        assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
        // test update with constant vector
        let mut argmin = Argmin::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
            Arc::new(Int32Vector::from_vec(vec![4])),
            10,
        ))];
        argmin.update_batch(&v).unwrap();
        assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
    }
 }
--- a/src/common/function/src/scalars/aggregate/diff.rs
+++ b/src/common/function/src/scalars/aggregate/diff.rs
@@ -0,0 +1,252 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::marker::PhantomData;
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
    Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::value::ListValue;
 use datatypes::vectors::{ConstantVector, Helper, ListVector};
 use datatypes::with_match_primitive_type_id;
 use num_traits::AsPrimitive;
 use snafu::{ensure, OptionExt, ResultExt};
 // https://numpy.org/doc/stable/reference/generated/numpy.diff.html
 // I is the input type, O is the output type.
 #[derive(Debug, Default)]
 pub struct Diff<I, O> {
    values: Vec<I>,
    _phantom: PhantomData<O>,
 }
 impl<I, O> Diff<I, O> {
    fn push(&mut self, value: I) {
        self.values.push(value);
    }
 }
 impl<I, O> Accumulator for Diff<I, O>
 where
    I: WrapperType,
    O: WrapperType,
    I::Native: AsPrimitive<O::Native>,
    O::Native: std::ops::Sub<Output = O::Native>,
 {
    fn state(&self) -> Result<Vec<Value>> {
        let nums = self
            .values
            .iter()
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![Value::List(ListValue::new(
            nums,
            I::LogicalType::build_data_type(),
        ))])
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        ensure!(values.len() == 1, InvalidInputStateSnafu);
        let column = &values[0];
        let mut len = 1;
        let column: &<I as Scalar>::VectorType = if column.is_const() {
            len = column.len();
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        (0..len).for_each(|_| {
            for v in column.iter_data().flatten() {
                self.push(v);
            }
        });
        Ok(())
    }
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        let states = &states[0];
        let states = states
            .as_any()
            .downcast_ref::<ListVector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect ListVector, got vector type {}",
                    states.vector_type_name()
                ),
            })?;
        for state in states.values_iter() {
            if let Some(state) = state.context(FromScalarValueSnafu)? {
                self.update_batch(&[state])?;
            }
        }
        Ok(())
    }
    fn evaluate(&self) -> Result<Value> {
        if self.values.is_empty() || self.values.len() == 1 {
            return Ok(Value::Null);
        }
        let diff = self
            .values
            .windows(2)
            .map(|x| {
                let native = x[1].into_native().as_() - x[0].into_native().as_();
                O::from_native(native).into()
            })
            .collect::<Vec<Value>>();
        let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
        Ok(diff)
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct DiffAccumulatorCreator {}
 impl AggregateFunctionCreator for DiffAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
                },
                {
                    let err_msg = format!(
                        "\"DIFF\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 1, InvalidInputStateSnafu);
        with_match_primitive_type_id!(
            input_types[0].logical_type_id(),
            |$S| {
                Ok(ConcreteDataType::list_datatype($S::default().into()))
            },
            {
                unreachable!()
            }
        )
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 1, InvalidInputStateSnafu);
        with_match_primitive_type_id!(
            input_types[0].logical_type_id(),
            |$S| {
                Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
            },
            {
                unreachable!()
            }
        )
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::Int32Vector;
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut diff = Diff::<i32, i64>::default();
        diff.update_batch(&[]).unwrap();
        assert!(diff.values.is_empty());
        assert_eq!(Value::Null, diff.evaluate().unwrap());
        // test update one not-null value
        let mut diff = Diff::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
        diff.update_batch(&v).unwrap();
        assert_eq!(Value::Null, diff.evaluate().unwrap());
        // test update one null value
        let mut diff = Diff::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
        diff.update_batch(&v).unwrap();
        assert_eq!(Value::Null, diff.evaluate().unwrap());
        // test update no null-value batch
        let mut diff = Diff::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-1i32),
            Some(1),
            Some(2),
        ]))];
        let values = vec![Value::from(2_i64), Value::from(1_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );
        // test update null-value batch
        let mut diff = Diff::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-2i32),
            None,
            Some(3),
            Some(4),
        ]))];
        let values = vec![Value::from(5_i64), Value::from(1_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );
        // test update with constant vector
        let mut diff = Diff::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
            Arc::new(Int32Vector::from_vec(vec![4])),
            4,
        ))];
        let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
        diff.update_batch(&v).unwrap();
        assert_eq!(
            Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
            diff.evaluate().unwrap()
        );
    }
 }
--- a/src/common/function/src/scalars/aggregate/mean.rs
+++ b/src/common/function/src/scalars/aggregate/mean.rs
@@ -0,0 +1,238 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::marker::PhantomData;
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
    Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::types::WrapperType;
 use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
 use datatypes::with_match_primitive_type_id;
 use num_traits::AsPrimitive;
 use snafu::{ensure, OptionExt};
 #[derive(Debug, Default)]
 pub struct Mean<T> {
    sum: f64,
    n: u64,
    _phantom: PhantomData<T>,
 }
 impl<T> Mean<T>
 where
    T: WrapperType,
    T::Native: AsPrimitive<f64>,
 {
    #[inline(always)]
    fn push(&mut self, value: T) {
        self.sum += value.into_native().as_();
        self.n += 1;
    }
    #[inline(always)]
    fn update(&mut self, sum: f64, n: u64) {
        self.sum += sum;
        self.n += n;
    }
 }
 impl<T> Accumulator for Mean<T>
 where
    T: WrapperType,
    T::Native: AsPrimitive<f64>,
 {
    fn state(&self) -> Result<Vec<Value>> {
        Ok(vec![self.sum.into(), self.n.into()])
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        ensure!(values.len() == 1, InvalidInputStateSnafu);
        let column = &values[0];
        let mut len = 1;
        let column: &<T as Scalar>::VectorType = if column.is_const() {
            len = column.len();
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        (0..len).for_each(|_| {
            for v in column.iter_data().flatten() {
                self.push(v);
            }
        });
        Ok(())
    }
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        ensure!(
            states.len() == 2,
            BadAccumulatorImplSnafu {
                err_msg: "expect 2 states in `merge_batch`",
            }
        );
        let sum = &states[0];
        let n = &states[1];
        let sum = sum
            .as_any()
            .downcast_ref::<Float64Vector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect Float64Vector, got vector type {}",
                    sum.vector_type_name()
                ),
            })?;
        let n = n
            .as_any()
            .downcast_ref::<UInt64Vector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect UInt64Vector, got vector type {}",
                    sum.vector_type_name()
                ),
            })?;
        sum.iter_data().zip(n.iter_data()).for_each(|(sum, n)| {
            if let (Some(sum), Some(n)) = (sum, n) {
                self.update(sum, n);
            }
        });
        Ok(())
    }
    fn evaluate(&self) -> Result<Value> {
        if self.n == 0 {
            return Ok(Value::Null);
        }
        let values = self.sum / self.n as f64;
        Ok(values.into())
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct MeanAccumulatorCreator {}
 impl AggregateFunctionCreator for MeanAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
                },
                {
                    let err_msg = format!(
                        "\"MEAN\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 1, InvalidInputStateSnafu);
        Ok(ConcreteDataType::float64_datatype())
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 1, InvalidInputStateSnafu);
        Ok(vec![
            ConcreteDataType::float64_datatype(),
            ConcreteDataType::uint64_datatype(),
        ])
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::Int32Vector;
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut mean = Mean::<i32>::default();
        mean.update_batch(&[]).unwrap();
        assert_eq!(Value::Null, mean.evaluate().unwrap());
        // test update one not-null value
        let mut mean = Mean::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
        mean.update_batch(&v).unwrap();
        assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
        // test update one null value
        let mut mean = Mean::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
        mean.update_batch(&v).unwrap();
        assert_eq!(Value::Null, mean.evaluate().unwrap());
        // test update no null-value batch
        let mut mean = Mean::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-1i32),
            Some(1),
            Some(2),
        ]))];
        mean.update_batch(&v).unwrap();
        assert_eq!(Value::from(0.6666666666666666), mean.evaluate().unwrap());
        // test update null-value batch
        let mut mean = Mean::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
            Some(-2i32),
            None,
            Some(3),
            Some(4),
        ]))];
        mean.update_batch(&v).unwrap();
        assert_eq!(Value::from(1.6666666666666667), mean.evaluate().unwrap());
        // test update with constant vector
        let mut mean = Mean::<i32>::default();
        let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
            Arc::new(Int32Vector::from_vec(vec![4])),
            10,
        ))];
        mean.update_batch(&v).unwrap();
        assert_eq!(Value::from(4.0), mean.evaluate().unwrap());
    }
 }
--- a/src/common/function/src/scalars/aggregate/polyval.rs
+++ b/src/common/function/src/scalars/aggregate/polyval.rs
@@ -0,0 +1,329 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::marker::PhantomData;
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
    FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::types::{LogicalPrimitiveType, WrapperType};
 use datatypes::value::ListValue;
 use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
 use datatypes::with_match_primitive_type_id;
 use num_traits::AsPrimitive;
 use snafu::{ensure, OptionExt, ResultExt};
 // https://numpy.org/doc/stable/reference/generated/numpy.polyval.html
 #[derive(Debug, Default)]
 pub struct Polyval<T, PolyT>
 where
    T: WrapperType,
    T::Native: AsPrimitive<PolyT::Native>,
    PolyT: WrapperType,
    PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
 {
    values: Vec<T>,
    // DataFusion casts constant in into i64 type.
    x: Option<i64>,
    _phantom: PhantomData<PolyT>,
 }
 impl<T, PolyT> Polyval<T, PolyT>
 where
    T: WrapperType,
    T::Native: AsPrimitive<PolyT::Native>,
    PolyT: WrapperType,
    PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
 {
    fn push(&mut self, value: T) {
        self.values.push(value);
    }
 }
 impl<T, PolyT> Accumulator for Polyval<T, PolyT>
 where
    T: WrapperType,
    T::Native: AsPrimitive<PolyT::Native>,
    PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
    PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
    i64: AsPrimitive<<PolyT as WrapperType>::Native>,
 {
    fn state(&self) -> Result<Vec<Value>> {
        let nums = self
            .values
            .iter()
            .map(|&n| n.into())
            .collect::<Vec<Value>>();
        Ok(vec![
            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        ensure!(values.len() == 2, InvalidInputStateSnafu);
        ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
        if values[0].len() == 0 {
            return Ok(());
        }
        // This is a unary accumulator, so only one column is provided.
        let column = &values[0];
        let mut len = 1;
        let column: &<T as Scalar>::VectorType = if column.is_const() {
            len = column.len();
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        (0..len).for_each(|_| {
            for v in column.iter_data().flatten() {
                self.push(v);
            }
        });
        let x = &values[1];
        let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
            err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
        })?;
        // `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
        let first = x.get(0);
        ensure!(!first.is_null(), InvalidInputColSnafu);
        for i in 1..x.len() {
            ensure!(first == x.get(i), InvalidInputColSnafu);
        }
        let first = match first {
            Value::Int64(v) => v,
            // unreachable because we have checked `first` is not null and is i64 above
            _ => unreachable!(),
        };
        if let Some(x) = self.x {
            ensure!(x == first, InvalidInputColSnafu);
        } else {
            self.x = Some(first);
        };
        Ok(())
    }
    // DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
    // merge states from other accumulators (returned by `state()` method).
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        ensure!(
            states.len() == 2,
            BadAccumulatorImplSnafu {
                err_msg: "expect 2 states in `merge_batch`",
            }
        );
        let x = &states[1];
        let x = x
            .as_any()
            .downcast_ref::<Int64Vector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect Int64Vector, got vector type {}",
                    x.vector_type_name()
                ),
            })?;
        let x = x.get(0);
        if x.is_null() {
            return Ok(());
        }
        let x = match x {
            Value::Int64(x) => x,
            _ => unreachable!(),
        };
        self.x = Some(x);
        let values = &states[0];
        let values = values
            .as_any()
            .downcast_ref::<ListVector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect ListVector, got vector type {}",
                    values.vector_type_name()
                ),
            })?;
        for value in values.values_iter() {
            if let Some(value) = value.context(FromScalarValueSnafu)? {
                let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
                for v in column.iter_data().flatten() {
                    self.push(v);
                }
            }
        }
        Ok(())
    }
    // DataFusion expects this function to return the final value of this aggregator.
    fn evaluate(&self) -> Result<Value> {
        if self.values.is_empty() {
            return Ok(Value::Null);
        }
        let x = if let Some(x) = self.x {
            x
        } else {
            return Ok(Value::Null);
        };
        let len = self.values.len();
        let polyval: PolyT = self
            .values
            .iter()
            .enumerate()
            .map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
            .sum();
        Ok(polyval.into())
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct PolyvalAccumulatorCreator {}
 impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
                },
                {
                    let err_msg = format!(
                        "\"POLYVAL\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
        let input_type = self.input_types()?[0].logical_type_id();
        with_match_primitive_type_id!(
            input_type,
            |$S| {
                Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
            },
            {
                unreachable!()
            }
        )
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
        Ok(vec![
            ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
            ConcreteDataType::int64_datatype(),
        ])
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::Int32Vector;
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut polyval = Polyval::<i32, i64>::default();
        polyval.update_batch(&[]).unwrap();
        assert!(polyval.values.is_empty());
        assert_eq!(Value::Null, polyval.evaluate().unwrap());
        // test update one not-null value
        let mut polyval = Polyval::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(3)])),
            Arc::new(Int64Vector::from(vec![Some(2_i64)])),
        ];
        polyval.update_batch(&v).unwrap();
        assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
        // test update one null value
        let mut polyval = Polyval::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
            Arc::new(Int64Vector::from(vec![Some(2_i64)])),
        ];
        polyval.update_batch(&v).unwrap();
        assert_eq!(Value::Null, polyval.evaluate().unwrap());
        // test update no null-value batch
        let mut polyval = Polyval::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
            Arc::new(Int64Vector::from(vec![
                Some(2_i64),
                Some(2_i64),
                Some(2_i64),
            ])),
        ];
        polyval.update_batch(&v).unwrap();
        assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
        // test update null-value batch
        let mut polyval = Polyval::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
            Arc::new(Int64Vector::from(vec![
                Some(2_i64),
                Some(2_i64),
                Some(2_i64),
                Some(2_i64),
            ])),
        ];
        polyval.update_batch(&v).unwrap();
        assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
        // test update with constant vector
        let mut polyval = Polyval::<i32, i64>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(ConstantVector::new(
                Arc::new(Int32Vector::from_vec(vec![4])),
                2,
            )),
            Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
        ];
        polyval.update_batch(&v).unwrap();
        assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
    }
 }
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs
@@ -0,0 +1,270 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
    FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
    Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::value::{ListValue, OrderedFloat};
 use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
 use datatypes::with_match_primitive_type_id;
 use num_traits::AsPrimitive;
 use snafu::{ensure, OptionExt, ResultExt};
 use statrs::distribution::{ContinuousCDF, Normal};
 use statrs::statistics::Statistics;
 // https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
 #[derive(Debug, Default)]
 pub struct ScipyStatsNormCdf<T> {
    values: Vec<T>,
    x: Option<f64>,
 }
 impl<T> ScipyStatsNormCdf<T> {
    fn push(&mut self, value: T) {
        self.values.push(value);
    }
 }
 impl<T> Accumulator for ScipyStatsNormCdf<T>
 where
    T: WrapperType + std::iter::Sum<T>,
    T::Native: AsPrimitive<f64>,
 {
    fn state(&self) -> Result<Vec<Value>> {
        let nums = self
            .values
            .iter()
            .map(|&x| x.into())
            .collect::<Vec<Value>>();
        Ok(vec![
            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        ensure!(values.len() == 2, InvalidInputStateSnafu);
        ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
        if values[0].len() == 0 {
            return Ok(());
        }
        let column = &values[0];
        let mut len = 1;
        let column: &<T as Scalar>::VectorType = if column.is_const() {
            len = column.len();
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        let x = &values[1];
        let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
            err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
        })?;
        let first = x.get(0);
        ensure!(!first.is_null(), InvalidInputColSnafu);
        let first = match first {
            Value::Float64(OrderedFloat(v)) => v,
            // unreachable because we have checked `first` is not null and is i64 above
            _ => unreachable!(),
        };
        if let Some(x) = self.x {
            ensure!(x == first, InvalidInputColSnafu);
        } else {
            self.x = Some(first);
        };
        (0..len).for_each(|_| {
            for v in column.iter_data().flatten() {
                self.push(v);
            }
        });
        Ok(())
    }
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        ensure!(
            states.len() == 2,
            BadAccumulatorImplSnafu {
                err_msg: "expect 2 states in `merge_batch`",
            }
        );
        let x = &states[1];
        let x = x
            .as_any()
            .downcast_ref::<Float64Vector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect Float64Vector, got vector type {}",
                    x.vector_type_name()
                ),
            })?;
        let x = x.get(0);
        if x.is_null() {
            return Ok(());
        }
        let x = match x {
            Value::Float64(OrderedFloat(x)) => x,
            _ => unreachable!(),
        };
        self.x = Some(x);
        let values = &states[0];
        let values = values
            .as_any()
            .downcast_ref::<ListVector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect ListVector, got vector type {}",
                    values.vector_type_name()
                ),
            })?;
        for value in values.values_iter() {
            if let Some(value) = value.context(FromScalarValueSnafu)? {
                let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
                for v in column.iter_data().flatten() {
                    self.push(v);
                }
            }
        }
        Ok(())
    }
    fn evaluate(&self) -> Result<Value> {
        let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
        let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
        if mean.is_nan() || std_dev.is_nan() {
            Ok(Value::Null)
        } else {
            let x = if let Some(x) = self.x {
                x
            } else {
                return Ok(Value::Null);
            };
            let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
            Ok(n.cdf(x).into())
        }
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct ScipyStatsNormCdfAccumulatorCreator {}
 impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
                },
                {
                    let err_msg = format!(
                        "\"SCIPYSTATSNORMCDF\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
        Ok(ConcreteDataType::float64_datatype())
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
        Ok(vec![
            ConcreteDataType::list_datatype(input_types[0].clone()),
            ConcreteDataType::float64_datatype(),
        ])
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::{Float64Vector, Int32Vector};
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
        scipy_stats_norm_cdf.update_batch(&[]).unwrap();
        assert!(scipy_stats_norm_cdf.values.is_empty());
        assert_eq!(Value::Null, scipy_stats_norm_cdf.evaluate().unwrap());
        // test update no null-value batch
        let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
            Arc::new(Float64Vector::from(vec![
                Some(2.0_f64),
                Some(2.0_f64),
                Some(2.0_f64),
            ])),
        ];
        scipy_stats_norm_cdf.update_batch(&v).unwrap();
        assert_eq!(
            Value::from(0.8086334555398362),
            scipy_stats_norm_cdf.evaluate().unwrap()
        );
        // test update null-value batch
        let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
            Arc::new(Float64Vector::from(vec![
                Some(2.0_f64),
                None,
                Some(2.0_f64),
                Some(2.0_f64),
            ])),
        ];
        scipy_stats_norm_cdf.update_batch(&v).unwrap();
        assert_eq!(
            Value::from(0.5412943699039795),
            scipy_stats_norm_cdf.evaluate().unwrap()
        );
    }
 }
--- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
+++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs
@@ -0,0 +1,271 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::sync::Arc;
 use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
 use common_query::error::{
    self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
    FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
    Result,
 };
 use common_query::logical_plan::accumulator::AggrFuncTypeStore;
 use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
 use common_query::prelude::*;
 use datatypes::prelude::*;
 use datatypes::value::{ListValue, OrderedFloat};
 use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
 use datatypes::with_match_primitive_type_id;
 use num_traits::AsPrimitive;
 use snafu::{ensure, OptionExt, ResultExt};
 use statrs::distribution::{Continuous, Normal};
 use statrs::statistics::Statistics;
 // https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
 #[derive(Debug, Default)]
 pub struct ScipyStatsNormPdf<T> {
    values: Vec<T>,
    x: Option<f64>,
 }
 impl<T> ScipyStatsNormPdf<T> {
    fn push(&mut self, value: T) {
        self.values.push(value);
    }
 }
 impl<T> Accumulator for ScipyStatsNormPdf<T>
 where
    T: WrapperType,
    T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
 {
    fn state(&self) -> Result<Vec<Value>> {
        let nums = self
            .values
            .iter()
            .map(|&x| x.into())
            .collect::<Vec<Value>>();
        Ok(vec![
            Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
            self.x.into(),
        ])
    }
    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
        if values.is_empty() {
            return Ok(());
        }
        ensure!(values.len() == 2, InvalidInputStateSnafu);
        ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
        if values[0].len() == 0 {
            return Ok(());
        }
        let column = &values[0];
        let mut len = 1;
        let column: &<T as Scalar>::VectorType = if column.is_const() {
            len = column.len();
            let column: &ConstantVector = unsafe { Helper::static_cast(column) };
            unsafe { Helper::static_cast(column.inner()) }
        } else {
            unsafe { Helper::static_cast(column) }
        };
        let x = &values[1];
        let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
            err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
        })?;
        let first = x.get(0);
        ensure!(!first.is_null(), InvalidInputColSnafu);
        let first = match first {
            Value::Float64(OrderedFloat(v)) => v,
            // unreachable because we have checked `first` is not null and is i64 above
            _ => unreachable!(),
        };
        if let Some(x) = self.x {
            ensure!(x == first, InvalidInputColSnafu);
        } else {
            self.x = Some(first);
        };
        (0..len).for_each(|_| {
            for v in column.iter_data().flatten() {
                self.push(v);
            }
        });
        Ok(())
    }
    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
        if states.is_empty() {
            return Ok(());
        }
        ensure!(
            states.len() == 2,
            BadAccumulatorImplSnafu {
                err_msg: "expect 2 states in `merge_batch`",
            }
        );
        let x = &states[1];
        let x = x
            .as_any()
            .downcast_ref::<Float64Vector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect Float64Vector, got vector type {}",
                    x.vector_type_name()
                ),
            })?;
        let x = x.get(0);
        if x.is_null() {
            return Ok(());
        }
        let x = match x {
            Value::Float64(OrderedFloat(x)) => x,
            _ => unreachable!(),
        };
        self.x = Some(x);
        let values = &states[0];
        let values = values
            .as_any()
            .downcast_ref::<ListVector>()
            .with_context(|| DowncastVectorSnafu {
                err_msg: format!(
                    "expect ListVector, got vector type {}",
                    values.vector_type_name()
                ),
            })?;
        for value in values.values_iter() {
            if let Some(value) = value.context(FromScalarValueSnafu)? {
                let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
                for v in column.iter_data().flatten() {
                    self.push(v);
                }
            }
        }
        Ok(())
    }
    fn evaluate(&self) -> Result<Value> {
        let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
        let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
        if mean.is_nan() || std_dev.is_nan() {
            Ok(Value::Null)
        } else {
            let x = if let Some(x) = self.x {
                x
            } else {
                return Ok(Value::Null);
            };
            let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
            Ok(n.pdf(x).into())
        }
    }
 }
 #[as_aggr_func_creator]
 #[derive(Debug, Default, AggrFuncTypeStore)]
 pub struct ScipyStatsNormPdfAccumulatorCreator {}
 impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
    fn creator(&self) -> AccumulatorCreatorFunction {
        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
            let input_type = &types[0];
            with_match_primitive_type_id!(
                input_type.logical_type_id(),
                |$S| {
                    Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
                },
                {
                    let err_msg = format!(
                        "\"SCIPYSTATSNORMpdf\" aggregate function not support data type {:?}",
                        input_type.logical_type_id(),
                    );
                    CreateAccumulatorSnafu { err_msg }.fail()?
                }
            )
        });
        creator
    }
    fn output_type(&self) -> Result<ConcreteDataType> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
        Ok(ConcreteDataType::float64_datatype())
    }
    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
        let input_types = self.input_types()?;
        ensure!(input_types.len() == 2, InvalidInputStateSnafu);
        Ok(vec![
            ConcreteDataType::list_datatype(input_types[0].clone()),
            ConcreteDataType::float64_datatype(),
        ])
    }
 }
 #[cfg(test)]
 mod test {
    use datatypes::vectors::{Float64Vector, Int32Vector};
    use super::*;
    #[test]
    fn test_update_batch() {
        // test update empty batch, expect not updating anything
        let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
        scipy_stats_norm_pdf.update_batch(&[]).unwrap();
        assert!(scipy_stats_norm_pdf.values.is_empty());
        assert_eq!(Value::Null, scipy_stats_norm_pdf.evaluate().unwrap());
        // test update no null-value batch
        let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
            Arc::new(Float64Vector::from(vec![
                Some(2.0_f64),
                Some(2.0_f64),
                Some(2.0_f64),
            ])),
        ];
        scipy_stats_norm_pdf.update_batch(&v).unwrap();
        assert_eq!(
            Value::from(0.17843340219081558),
            scipy_stats_norm_pdf.evaluate().unwrap()
        );
        // test update null-value batch
        let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
        let v: Vec<VectorRef> = vec![
            Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
            Arc::new(Float64Vector::from(vec![
                Some(2.0_f64),
                None,
                Some(2.0_f64),
                Some(2.0_f64),
            ])),
        ];
        scipy_stats_norm_pdf.update_batch(&v).unwrap();
        assert_eq!(
            Value::from(0.12343972049858312),
            scipy_stats_norm_pdf.evaluate().unwrap()
        );
    }
 }
--- a/src/common/grpc/src/channel_manager.rs
+++ b/src/common/grpc/src/channel_manager.rs
@@ -445,20 +445,10 @@ impl Pool {
 async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
    let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
-    // use weak ref here to prevent pool being leaked
+
    let pool_weak = {
        let weak = Arc::downgrade(&pool);
        drop(pool);
        weak
    };
    loop {
        let _ = interval.tick().await;
-        if let Some(pool) = pool_weak.upgrade() {
+        pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
            pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
        } else {
            // no one is using this pool, so we can also let go
            break;
        }
    }
 }
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -28,6 +28,7 @@ use crate::error::{
    InvalidRoleSnafu, ParseNumSnafu, Result,
 };
 use crate::peer::Peer;
 use crate::ClusterId;
 const CLUSTER_NODE_INFO_PREFIX: &str = "__meta_cluster_node_info";
@@ -55,9 +56,12 @@ pub trait ClusterInfo {
    // TODO(jeremy): Other info, like region status, etc.
 }
-/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-0-{role}-{node_id}`.
+/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
 #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct NodeInfoKey {
    /// The cluster id.
    // todo(hl): remove cluster_id as it is not assigned anywhere.
    pub cluster_id: ClusterId,
    /// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
    pub role: Role,
    /// The node id.
@@ -80,15 +84,24 @@ impl NodeInfoKey {
            _ => peer.id,
        };
-        Some(NodeInfoKey { role, node_id })
+        Some(NodeInfoKey {
            cluster_id: header.cluster_id,
            role,
            node_id,
        })
    }
-    pub fn key_prefix() -> String {
+    pub fn key_prefix_with_cluster_id(cluster_id: u64) -> String {
-        format!("{}-0-", CLUSTER_NODE_INFO_PREFIX)
+        format!("{}-{}-", CLUSTER_NODE_INFO_PREFIX, cluster_id)
    }
-    pub fn key_prefix_with_role(role: Role) -> String {
+    pub fn key_prefix_with_role(cluster_id: ClusterId, role: Role) -> String {
-        format!("{}-0-{}-", CLUSTER_NODE_INFO_PREFIX, i32::from(role))
+        format!(
            "{}-{}-{}-",
            CLUSTER_NODE_INFO_PREFIX,
            cluster_id,
            i32::from(role)
        )
    }
 }
@@ -180,10 +193,15 @@ impl FromStr for NodeInfoKey {
        let caps = CLUSTER_NODE_INFO_PREFIX_PATTERN
            .captures(key)
            .context(InvalidNodeInfoKeySnafu { key })?;
        ensure!(caps.len() == 4, InvalidNodeInfoKeySnafu { key });
        let cluster_id = caps[1].to_string();
        let role = caps[2].to_string();
        let node_id = caps[3].to_string();
        let cluster_id: u64 = cluster_id.parse().context(ParseNumSnafu {
            err_msg: format!("invalid cluster_id: {cluster_id}"),
        })?;
        let role: i32 = role.parse().context(ParseNumSnafu {
            err_msg: format!("invalid role {role}"),
        })?;
@@ -192,7 +210,11 @@ impl FromStr for NodeInfoKey {
            err_msg: format!("invalid node_id: {node_id}"),
        })?;
-        Ok(Self { role, node_id })
+        Ok(Self {
            cluster_id,
            role,
            node_id,
        })
    }
 }
@@ -211,8 +233,9 @@ impl TryFrom<Vec<u8>> for NodeInfoKey {
 impl From<&NodeInfoKey> for Vec<u8> {
    fn from(key: &NodeInfoKey) -> Self {
        format!(
-            "{}-0-{}-{}",
+            "{}-{}-{}-{}",
            CLUSTER_NODE_INFO_PREFIX,
            key.cluster_id,
            i32::from(key.role),
            key.node_id
        )
@@ -285,6 +308,7 @@ mod tests {
    #[test]
    fn test_node_info_key_round_trip() {
        let key = NodeInfoKey {
            cluster_id: 1,
            role: Datanode,
            node_id: 2,
        };
@@ -292,6 +316,7 @@ mod tests {
        let key_bytes: Vec<u8> = (&key).into();
        let new_key: NodeInfoKey = key_bytes.try_into().unwrap();
        assert_eq!(1, new_key.cluster_id);
        assert_eq!(Datanode, new_key.role);
        assert_eq!(2, new_key.node_id);
    }
@@ -337,11 +362,11 @@ mod tests {
    #[test]
    fn test_node_info_key_prefix() {
-        let prefix = NodeInfoKey::key_prefix();
+        let prefix = NodeInfoKey::key_prefix_with_cluster_id(1);
-        assert_eq!(prefix, "__meta_cluster_node_info-0-");
+        assert_eq!(prefix, "__meta_cluster_node_info-1-");
-        let prefix = NodeInfoKey::key_prefix_with_role(Frontend);
+        let prefix = NodeInfoKey::key_prefix_with_role(2, Frontend);
-        assert_eq!(prefix, "__meta_cluster_node_info-0-1-");
+        assert_eq!(prefix, "__meta_cluster_node_info-2-1-");
    }
    #[test]
--- a/src/common/meta/src/datanode.rs
+++ b/src/common/meta/src/datanode.rs
@@ -25,8 +25,8 @@ use store_api::region_engine::{RegionRole, RegionStatistic};
 use store_api::storage::RegionId;
 use table::metadata::TableId;
 use crate::error;
 use crate::error::Result;
 use crate::{error, ClusterId};
 pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
 const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
@@ -48,10 +48,11 @@ lazy_static! {
 /// The key of the datanode stat in the storage.
 ///
-/// The format is `__meta_datanode_stat-0-{node_id}`.
+/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct Stat {
    pub timestamp_millis: i64,
    pub cluster_id: ClusterId,
    // The datanode Id.
    pub id: u64,
    // The datanode address.
@@ -101,7 +102,10 @@ impl Stat {
    }
    pub fn stat_key(&self) -> DatanodeStatKey {
-        DatanodeStatKey { node_id: self.id }
+        DatanodeStatKey {
            cluster_id: self.cluster_id,
            node_id: self.id,
        }
    }
    /// Returns a tuple array containing [RegionId] and [RegionRole].
@@ -141,7 +145,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
        } = value;
        match (header, peer) {
-            (Some(_header), Some(peer)) => {
+            (Some(header), Some(peer)) => {
                let region_stats = region_stats
                    .iter()
                    .map(RegionStat::from)
@@ -149,6 +153,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
                Ok(Self {
                    timestamp_millis: time_util::current_time_millis(),
                    cluster_id: header.cluster_id,
                    // datanode id
                    id: peer.id,
                    // datanode address
@@ -191,24 +196,32 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
 /// The key of the datanode stat in the memory store.
 ///
-/// The format is `__meta_datanode_stat-0-{node_id}`.
+/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub struct DatanodeStatKey {
    pub cluster_id: ClusterId,
    pub node_id: u64,
 }
 impl DatanodeStatKey {
    /// The key prefix.
    pub fn prefix_key() -> Vec<u8> {
-        // todo(hl): remove cluster id in prefix
+        format!("{DATANODE_STAT_PREFIX}-").into_bytes()
-        format!("{DATANODE_STAT_PREFIX}-0-").into_bytes()
+    }
    /// The key prefix with the cluster id.
    pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
        format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
    }
 }
 impl From<DatanodeStatKey> for Vec<u8> {
    fn from(value: DatanodeStatKey) -> Self {
-        // todo(hl): remove cluster id in prefix
+        format!(
-        format!("{}-0-{}", DATANODE_STAT_PREFIX, value.node_id).into_bytes()
+            "{}-{}-{}",
            DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
        )
        .into_bytes()
    }
 }
@@ -221,12 +234,20 @@ impl FromStr for DatanodeStatKey {
            .context(error::InvalidStatKeySnafu { key })?;
        ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
        let cluster_id = caps[1].to_string();
        let node_id = caps[2].to_string();
        let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
            err_msg: format!("invalid cluster_id: {cluster_id}"),
        })?;
        let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
            err_msg: format!("invalid node_id: {node_id}"),
        })?;
-        Ok(Self { node_id })
+        Ok(Self {
            cluster_id,
            node_id,
        })
    }
 }
@@ -300,6 +321,7 @@ mod tests {
    #[test]
    fn test_stat_key() {
        let stat = Stat {
            cluster_id: 3,
            id: 101,
            region_num: 10,
            ..Default::default()
@@ -307,12 +329,14 @@ mod tests {
        let stat_key = stat.stat_key();
        assert_eq!(3, stat_key.cluster_id);
        assert_eq!(101, stat_key.node_id);
    }
    #[test]
    fn test_stat_val_round_trip() {
        let stat = Stat {
            cluster_id: 0,
            id: 101,
            region_num: 100,
            ..Default::default()
@@ -327,6 +351,7 @@ mod tests {
        assert_eq!(1, stats.len());
        let stat = stats.first().unwrap();
        assert_eq!(0, stat.cluster_id);
        assert_eq!(101, stat.id);
        assert_eq!(100, stat.region_num);
    }
--- a/src/common/meta/src/ddl.rs
+++ b/src/common/meta/src/ddl.rs
@@ -30,7 +30,7 @@ use crate::node_manager::NodeManagerRef;
 use crate::region_keeper::MemoryRegionKeeperRef;
 use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
 use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
-use crate::DatanodeId;
+use crate::{ClusterId, DatanodeId};
 pub mod alter_database;
 pub mod alter_logical_tables;
@@ -57,6 +57,7 @@ pub mod utils;
 #[derive(Debug, Default)]
 pub struct ExecutorContext {
    pub cluster_id: Option<u64>,
    pub tracing_context: Option<W3cTrace>,
 }
@@ -89,6 +90,10 @@ pub trait ProcedureExecutor: Send + Sync {
 pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
 pub struct TableMetadataAllocatorContext {
    pub cluster_id: ClusterId,
 }
 /// Metadata allocated to a table.
 #[derive(Default)]
 pub struct TableMetadata {
@@ -103,7 +108,7 @@ pub struct TableMetadata {
 pub type RegionFailureDetectorControllerRef = Arc<dyn RegionFailureDetectorController>;
-pub type DetectingRegion = (DatanodeId, RegionId);
+pub type DetectingRegion = (ClusterId, DatanodeId, RegionId);
 /// Used for actively registering Region failure detectors.
 ///
--- a/src/common/meta/src/ddl/alter_database.rs
+++ b/src/common/meta/src/ddl/alter_database.rs
@@ -30,6 +30,7 @@ use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock};
 use crate::rpc::ddl::UnsetDatabaseOption::{self};
 use crate::rpc::ddl::{AlterDatabaseKind, AlterDatabaseTask, SetDatabaseOption};
 use crate::ClusterId;
 pub struct AlterDatabaseProcedure {
    pub context: DdlContext,
@@ -64,10 +65,14 @@ fn build_new_schema_value(
 impl AlterDatabaseProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterDatabase";
-    pub fn new(task: AlterDatabaseTask, context: DdlContext) -> Result<Self> {
+    pub fn new(
        cluster_id: ClusterId,
        task: AlterDatabaseTask,
        context: DdlContext,
    ) -> Result<Self> {
        Ok(Self {
            context,
-            data: AlterDatabaseData::new(task)?,
+            data: AlterDatabaseData::new(task, cluster_id)?,
        })
    }
@@ -178,6 +183,7 @@ enum AlterDatabaseState {
 /// The data of alter database procedure.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct AlterDatabaseData {
    cluster_id: ClusterId,
    state: AlterDatabaseState,
    kind: AlterDatabaseKind,
    catalog_name: String,
@@ -186,8 +192,9 @@ pub struct AlterDatabaseData {
 }
 impl AlterDatabaseData {
-    pub fn new(task: AlterDatabaseTask) -> Result<Self> {
+    pub fn new(task: AlterDatabaseTask, cluster_id: ClusterId) -> Result<Self> {
        Ok(Self {
            cluster_id,
            state: AlterDatabaseState::Prepare,
            kind: AlterDatabaseKind::try_from(task.alter_expr.kind.unwrap())?,
            catalog_name: task.alter_expr.catalog_name,
--- a/src/common/meta/src/ddl/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables.rs
@@ -37,9 +37,9 @@ use crate::key::table_info::TableInfoValue;
 use crate::key::table_route::PhysicalTableRouteValue;
 use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::metrics;
 use crate::rpc::ddl::AlterTableTask;
 use crate::rpc::router::find_leaders;
 use crate::{metrics, ClusterId};
 pub struct AlterLogicalTablesProcedure {
    pub context: DdlContext,
@@ -50,6 +50,7 @@ impl AlterLogicalTablesProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterLogicalTables";
    pub fn new(
        cluster_id: ClusterId,
        tasks: Vec<AlterTableTask>,
        physical_table_id: TableId,
        context: DdlContext,
@@ -57,6 +58,7 @@ impl AlterLogicalTablesProcedure {
        Self {
            context,
            data: AlterTablesData {
                cluster_id,
                state: AlterTablesState::Prepare,
                tasks,
                table_info_values: vec![],
@@ -238,6 +240,7 @@ impl Procedure for AlterLogicalTablesProcedure {
 #[derive(Debug, Serialize, Deserialize)]
 pub struct AlterTablesData {
    cluster_id: ClusterId,
    state: AlterTablesState,
    tasks: Vec<AlterTableTask>,
    /// Table info values before the alter operation.
--- a/src/common/meta/src/ddl/alter_table.rs
+++ b/src/common/meta/src/ddl/alter_table.rs
@@ -45,9 +45,9 @@ use crate::instruction::CacheIdent;
 use crate::key::table_info::TableInfoValue;
 use crate::key::{DeserializedValueWithBytes, RegionDistribution};
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
 use crate::metrics;
 use crate::rpc::ddl::AlterTableTask;
 use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};
 use crate::{metrics, ClusterId};
 /// The alter table procedure
 pub struct AlterTableProcedure {
@@ -64,11 +64,16 @@ pub struct AlterTableProcedure {
 impl AlterTableProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterTable";
-    pub fn new(table_id: TableId, task: AlterTableTask, context: DdlContext) -> Result<Self> {
+    pub fn new(
        cluster_id: ClusterId,
        table_id: TableId,
        task: AlterTableTask,
        context: DdlContext,
    ) -> Result<Self> {
        task.validate()?;
        Ok(Self {
            context,
-            data: AlterTableData::new(task, table_id),
+            data: AlterTableData::new(task, table_id, cluster_id),
            new_table_info: None,
        })
    }
@@ -302,6 +307,7 @@ enum AlterTableState {
 // The serialized data of alter table.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct AlterTableData {
    cluster_id: ClusterId,
    state: AlterTableState,
    task: AlterTableTask,
    table_id: TableId,
@@ -312,11 +318,12 @@ pub struct AlterTableData {
 }
 impl AlterTableData {
-    pub fn new(task: AlterTableTask, table_id: TableId) -> Self {
+    pub fn new(task: AlterTableTask, table_id: TableId, cluster_id: u64) -> Self {
        Self {
            state: AlterTableState::Prepare,
            task,
            table_id,
            cluster_id,
            table_info_value: None,
            region_distribution: None,
        }
--- a/src/common/meta/src/ddl/alter_table/region_request.rs
+++ b/src/common/meta/src/ddl/alter_table/region_request.rs
@@ -167,9 +167,10 @@ mod tests {
    use crate::test_util::{new_ddl_context, MockDatanodeManager};
    /// Prepares a region with schema `[ts: Timestamp, host: Tag, cpu: Field]`.
-    async fn prepare_ddl_context() -> (DdlContext, TableId, RegionId, String) {
+    async fn prepare_ddl_context() -> (DdlContext, u64, TableId, RegionId, String) {
        let datanode_manager = Arc::new(MockDatanodeManager::new(()));
        let ddl_context = new_ddl_context(datanode_manager);
        let cluster_id = 1;
        let table_id = 1024;
        let region_id = RegionId::new(table_id, 1);
        let table_name = "foo";
@@ -224,12 +225,19 @@ mod tests {
            )
            .await
            .unwrap();
-        (ddl_context, table_id, region_id, table_name.to_string())
+        (
            ddl_context,
            cluster_id,
            table_id,
            region_id,
            table_name.to_string(),
        )
    }
    #[tokio::test]
    async fn test_make_alter_region_request() {
-        let (ddl_context, table_id, region_id, table_name) = prepare_ddl_context().await;
+        let (ddl_context, cluster_id, table_id, region_id, table_name) =
            prepare_ddl_context().await;
        let task = AlterTableTask {
            alter_table: AlterTableExpr {
@@ -257,7 +265,8 @@ mod tests {
            },
        };
-        let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context).unwrap();
+        let mut procedure =
            AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
        procedure.on_prepare().await.unwrap();
        let alter_kind = procedure.make_region_alter_kind().unwrap();
        let Some(Body::Alter(alter_region_request)) = procedure
@@ -298,7 +307,8 @@ mod tests {
    #[tokio::test]
    async fn test_make_alter_column_type_region_request() {
-        let (ddl_context, table_id, region_id, table_name) = prepare_ddl_context().await;
+        let (ddl_context, cluster_id, table_id, region_id, table_name) =
            prepare_ddl_context().await;
        let task = AlterTableTask {
            alter_table: AlterTableExpr {
@@ -315,7 +325,8 @@ mod tests {
            },
        };
-        let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context).unwrap();
+        let mut procedure =
            AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
        procedure.on_prepare().await.unwrap();
        let alter_kind = procedure.make_region_alter_kind().unwrap();
        let Some(Body::Alter(alter_region_request)) = procedure
--- a/src/common/meta/src/ddl/create_flow.rs
+++ b/src/common/meta/src/ddl/create_flow.rs
@@ -46,9 +46,9 @@ use crate::key::flow::flow_route::FlowRouteValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::{DeserializedValueWithBytes, FlowId, FlowPartitionId};
 use crate::lock_key::{CatalogLock, FlowNameLock, TableNameLock};
 use crate::metrics;
 use crate::peer::Peer;
 use crate::rpc::ddl::{CreateFlowTask, QueryContext};
 use crate::{metrics, ClusterId};
 /// The procedure of flow creation.
 pub struct CreateFlowProcedure {
@@ -60,10 +60,16 @@ impl CreateFlowProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateFlow";
    /// Returns a new [CreateFlowProcedure].
-    pub fn new(task: CreateFlowTask, query_context: QueryContext, context: DdlContext) -> Self {
+    pub fn new(
        cluster_id: ClusterId,
        task: CreateFlowTask,
        query_context: QueryContext,
        context: DdlContext,
    ) -> Self {
        Self {
            context,
            data: CreateFlowData {
                cluster_id,
                task,
                flow_id: None,
                peers: vec![],
@@ -337,7 +343,6 @@ pub enum FlowType {
 impl FlowType {
    pub const RECORDING_RULE: &str = "recording_rule";
    pub const STREAMING: &str = "streaming";
    pub const FLOW_TYPE_KEY: &str = "flow_type";
 }
 impl Default for FlowType {
@@ -358,6 +363,7 @@ impl fmt::Display for FlowType {
 /// The serializable data.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct CreateFlowData {
    pub(crate) cluster_id: ClusterId,
    pub(crate) state: CreateFlowState,
    pub(crate) task: CreateFlowTask,
    pub(crate) flow_id: Option<FlowId>,
@@ -392,8 +398,7 @@ impl From<&CreateFlowData> for CreateRequest {
        };
        let flow_type = value.flow_type.unwrap_or_default().to_string();
-        req.flow_options
+        req.flow_options.insert("flow_type".to_string(), flow_type);
            .insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
        req
    }
 }
@@ -425,7 +430,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            .collect::<Vec<_>>();
        let flow_type = value.flow_type.unwrap_or_default().to_string();
-        options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
+        options.insert("flow_type".to_string(), flow_type);
        let flow_info = FlowInfoValue {
            source_table_ids: value.source_table_ids.clone(),
--- a/src/common/meta/src/ddl/create_flow/metadata.rs
+++ b/src/common/meta/src/ddl/create_flow/metadata.rs
@@ -23,10 +23,11 @@ impl CreateFlowProcedure {
    pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
        //TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
        let partitions = 1;
        let cluster_id = self.data.cluster_id;
        let (flow_id, peers) = self
            .context
            .flow_metadata_allocator
-            .create(partitions)
+            .create(cluster_id, partitions)
            .await?;
        self.data.flow_id = Some(flow_id);
        self.data.peers = peers;
--- a/src/common/meta/src/ddl/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/create_logical_tables.rs
@@ -36,9 +36,9 @@ use crate::ddl::DdlContext;
 use crate::error::{DecodeJsonSnafu, MetadataCorruptionSnafu, Result};
 use crate::key::table_route::TableRouteValue;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
 use crate::metrics;
 use crate::rpc::ddl::CreateTableTask;
 use crate::rpc::router::{find_leaders, RegionRoute};
 use crate::{metrics, ClusterId};
 pub struct CreateLogicalTablesProcedure {
    pub context: DdlContext,
@@ -49,6 +49,7 @@ impl CreateLogicalTablesProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateLogicalTables";
    pub fn new(
        cluster_id: ClusterId,
        tasks: Vec<CreateTableTask>,
        physical_table_id: TableId,
        context: DdlContext,
@@ -56,6 +57,7 @@ impl CreateLogicalTablesProcedure {
        Self {
            context,
            data: CreateTablesData {
                cluster_id,
                state: CreateTablesState::Prepare,
                tasks,
                table_ids_already_exists: vec![],
@@ -243,6 +245,7 @@ impl Procedure for CreateLogicalTablesProcedure {
 #[derive(Debug, Serialize, Deserialize)]
 pub struct CreateTablesData {
    cluster_id: ClusterId,
    state: CreateTablesState,
    tasks: Vec<CreateTableTask>,
    table_ids_already_exists: Vec<Option<TableId>>,
--- a/src/common/meta/src/ddl/create_table.rs
+++ b/src/common/meta/src/ddl/create_table.rs
@@ -37,17 +37,17 @@ use crate::ddl::utils::{
    add_peer_context_if_needed, convert_region_routes_to_detecting_regions, handle_retry_error,
    region_storage_path,
 };
-use crate::ddl::{DdlContext, TableMetadata};
+use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
 use crate::error::{self, Result};
 use crate::key::table_name::TableNameKey;
 use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
 use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
 use crate::metrics;
 use crate::region_keeper::OperatingRegionGuard;
 use crate::rpc::ddl::CreateTableTask;
 use crate::rpc::router::{
    find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
 };
 use crate::{metrics, ClusterId};
 pub struct CreateTableProcedure {
    pub context: DdlContext,
    pub creator: TableCreator,
@@ -56,10 +56,10 @@ pub struct CreateTableProcedure {
 impl CreateTableProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateTable";
-    pub fn new(task: CreateTableTask, context: DdlContext) -> Self {
+    pub fn new(cluster_id: ClusterId, task: CreateTableTask, context: DdlContext) -> Self {
        Self {
            context,
-            creator: TableCreator::new(task),
+            creator: TableCreator::new(cluster_id, task),
        }
    }
@@ -154,7 +154,12 @@ impl CreateTableProcedure {
        } = self
            .context
            .table_metadata_allocator
-            .create(&self.creator.data.task)
+            .create(
                &TableMetadataAllocatorContext {
                    cluster_id: self.creator.data.cluster_id,
                },
                &self.creator.data.task,
            )
            .await?;
        self.creator
            .set_allocated_metadata(table_id, table_route, region_wal_options);
@@ -263,6 +268,7 @@ impl CreateTableProcedure {
    /// - Failed to create table metadata.
    async fn on_create_metadata(&mut self) -> Result<Status> {
        let table_id = self.table_id();
        let cluster_id = self.creator.data.cluster_id;
        let manager = &self.context.table_metadata_manager;
        let raw_table_info = self.table_info().clone();
@@ -270,8 +276,10 @@ impl CreateTableProcedure {
        let region_wal_options = self.region_wal_options()?.clone();
        // Safety: the table_route must be allocated.
        let physical_table_route = self.table_route()?.clone();
-        let detecting_regions =
+        let detecting_regions = convert_region_routes_to_detecting_regions(
-            convert_region_routes_to_detecting_regions(&physical_table_route.region_routes);
+            cluster_id,
            &physical_table_route.region_routes,
        );
        let table_route = TableRouteValue::Physical(physical_table_route);
        manager
            .create_table_metadata(raw_table_info, table_route, region_wal_options)
@@ -343,10 +351,11 @@ pub struct TableCreator {
 }
 impl TableCreator {
-    pub fn new(task: CreateTableTask) -> Self {
+    pub fn new(cluster_id: ClusterId, task: CreateTableTask) -> Self {
        Self {
            data: CreateTableData {
                state: CreateTableState::Prepare,
                cluster_id,
                task,
                table_route: None,
                region_wal_options: None,
@@ -412,6 +421,7 @@ pub struct CreateTableData {
    table_route: Option<PhysicalTableRouteValue>,
    /// None stands for not allocated yet.
    pub region_wal_options: Option<HashMap<RegionNumber, String>>,
    pub cluster_id: ClusterId,
 }
 impl CreateTableData {
--- a/src/common/meta/src/ddl/create_view.rs
+++ b/src/common/meta/src/ddl/create_view.rs
@@ -24,13 +24,13 @@ use table::table_reference::TableReference;
 use crate::cache_invalidator::Context;
 use crate::ddl::utils::handle_retry_error;
-use crate::ddl::{DdlContext, TableMetadata};
+use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
 use crate::error::{self, Result};
 use crate::instruction::CacheIdent;
 use crate::key::table_name::TableNameKey;
 use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
 use crate::metrics;
 use crate::rpc::ddl::CreateViewTask;
 use crate::{metrics, ClusterId};
 // The procedure to execute `[CreateViewTask]`.
 pub struct CreateViewProcedure {
@@ -41,11 +41,12 @@ pub struct CreateViewProcedure {
 impl CreateViewProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateView";
-    pub fn new(task: CreateViewTask, context: DdlContext) -> Self {
+    pub fn new(cluster_id: ClusterId, task: CreateViewTask, context: DdlContext) -> Self {
        Self {
            context,
            data: CreateViewData {
                state: CreateViewState::Prepare,
                cluster_id,
                task,
                need_update: false,
            },
@@ -143,7 +144,12 @@ impl CreateViewProcedure {
            let TableMetadata { table_id, .. } = self
                .context
                .table_metadata_allocator
-                .create_view(&None)
+                .create_view(
                    &TableMetadataAllocatorContext {
                        cluster_id: self.data.cluster_id,
                    },
                    &None,
                )
                .await?;
            self.data.set_allocated_metadata(table_id, false);
        }
@@ -279,6 +285,7 @@ pub enum CreateViewState {
 pub struct CreateViewData {
    pub state: CreateViewState,
    pub task: CreateViewTask,
    pub cluster_id: ClusterId,
    /// Whether to update the view info.
    pub need_update: bool,
 }
--- a/src/common/meta/src/ddl/drop_database.rs
+++ b/src/common/meta/src/ddl/drop_database.rs
@@ -35,6 +35,7 @@ use crate::ddl::DdlContext;
 use crate::error::Result;
 use crate::key::table_name::TableNameValue;
 use crate::lock_key::{CatalogLock, SchemaLock};
 use crate::ClusterId;
 pub struct DropDatabaseProcedure {
    /// The context of procedure runtime.
@@ -53,6 +54,7 @@ pub(crate) enum DropTableTarget {
 /// Context of [DropDatabaseProcedure] execution.
 pub(crate) struct DropDatabaseContext {
    cluster_id: ClusterId,
    catalog: String,
    schema: String,
    drop_if_exists: bool,
@@ -85,6 +87,7 @@ impl DropDatabaseProcedure {
        Self {
            runtime_context: context,
            context: DropDatabaseContext {
                cluster_id: 0,
                catalog,
                schema,
                drop_if_exists,
@@ -105,6 +108,7 @@ impl DropDatabaseProcedure {
        Ok(Self {
            runtime_context,
            context: DropDatabaseContext {
                cluster_id: 0,
                catalog,
                schema,
                drop_if_exists,
--- a/src/common/meta/src/ddl/drop_database/cursor.rs
+++ b/src/common/meta/src/ddl/drop_database/cursor.rs
@@ -217,10 +217,11 @@ mod tests {
    async fn test_next_without_logical_tables() {
        let node_manager = Arc::new(MockDatanodeManager::new(()));
        let ddl_context = new_ddl_context(node_manager);
-        create_physical_table(&ddl_context, "phy").await;
+        create_physical_table(&ddl_context, 0, "phy").await;
        // It always starts from Logical
        let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
            drop_if_exists: false,
@@ -251,11 +252,12 @@ mod tests {
    async fn test_next_with_logical_tables() {
        let node_manager = Arc::new(MockDatanodeManager::new(()));
        let ddl_context = new_ddl_context(node_manager);
-        let physical_table_id = create_physical_table(&ddl_context, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
-        create_logical_table(ddl_context.clone(), physical_table_id, "metric_0").await;
+        create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric_0").await;
        // It always starts from Logical
        let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
            drop_if_exists: false,
@@ -284,6 +286,7 @@ mod tests {
        let ddl_context = new_ddl_context(node_manager);
        let mut state = DropDatabaseCursor::new(DropTableTarget::Physical);
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
            drop_if_exists: false,
--- a/src/common/meta/src/ddl/drop_database/executor.rs
+++ b/src/common/meta/src/ddl/drop_database/executor.rs
@@ -98,10 +98,11 @@ impl State for DropDatabaseExecutor {
    async fn next(
        &mut self,
        ddl_ctx: &DdlContext,
-        _ctx: &mut DropDatabaseContext,
+        ctx: &mut DropDatabaseContext,
    ) -> Result<(Box<dyn State>, Status)> {
        self.register_dropping_regions(ddl_ctx)?;
-        let executor = DropTableExecutor::new(self.table_name.clone(), self.table_id, true);
+        let executor =
            DropTableExecutor::new(ctx.cluster_id, self.table_name.clone(), self.table_id, true);
        // Deletes metadata for table permanently.
        let table_route_value = TableRouteValue::new(
            self.table_id,
@@ -186,7 +187,7 @@ mod tests {
    async fn test_next_with_physical_table() {
        let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
        let ddl_context = new_ddl_context(node_manager);
-        let physical_table_id = create_physical_table(&ddl_context, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        let (_, table_route) = ddl_context
            .table_metadata_manager
            .table_route_manager()
@@ -202,6 +203,7 @@ mod tests {
                DropTableTarget::Physical,
            );
            let mut ctx = DropDatabaseContext {
                cluster_id: 0,
                catalog: DEFAULT_CATALOG_NAME.to_string(),
                schema: DEFAULT_SCHEMA_NAME.to_string(),
                drop_if_exists: false,
@@ -214,6 +216,7 @@ mod tests {
        }
        // Execute again
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
            drop_if_exists: false,
@@ -236,8 +239,8 @@ mod tests {
    async fn test_next_logical_table() {
        let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
        let ddl_context = new_ddl_context(node_manager);
-        let physical_table_id = create_physical_table(&ddl_context, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
-        create_logical_table(ddl_context.clone(), physical_table_id, "metric").await;
+        create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric").await;
        let logical_table_id = physical_table_id + 1;
        let (_, table_route) = ddl_context
            .table_metadata_manager
@@ -254,6 +257,7 @@ mod tests {
                DropTableTarget::Logical,
            );
            let mut ctx = DropDatabaseContext {
                cluster_id: 0,
                catalog: DEFAULT_CATALOG_NAME.to_string(),
                schema: DEFAULT_SCHEMA_NAME.to_string(),
                drop_if_exists: false,
@@ -266,6 +270,7 @@ mod tests {
        }
        // Execute again
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
            drop_if_exists: false,
@@ -340,7 +345,7 @@ mod tests {
    async fn test_next_retryable_err() {
        let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
        let ddl_context = new_ddl_context(node_manager);
-        let physical_table_id = create_physical_table(&ddl_context, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        let (_, table_route) = ddl_context
            .table_metadata_manager
            .table_route_manager()
@@ -355,6 +360,7 @@ mod tests {
            DropTableTarget::Physical,
        );
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: DEFAULT_CATALOG_NAME.to_string(),
            schema: DEFAULT_SCHEMA_NAME.to_string(),
            drop_if_exists: false,
@@ -368,7 +374,7 @@ mod tests {
    async fn test_on_recovery() {
        let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
        let ddl_context = new_ddl_context(node_manager);
-        let physical_table_id = create_physical_table(&ddl_context, "phy").await;
+        let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
        let (_, table_route) = ddl_context
            .table_metadata_manager
            .table_route_manager()
@@ -384,6 +390,7 @@ mod tests {
                DropTableTarget::Physical,
            );
            let mut ctx = DropDatabaseContext {
                cluster_id: 0,
                catalog: DEFAULT_CATALOG_NAME.to_string(),
                schema: DEFAULT_SCHEMA_NAME.to_string(),
                drop_if_exists: false,
--- a/src/common/meta/src/ddl/drop_database/metadata.rs
+++ b/src/common/meta/src/ddl/drop_database/metadata.rs
@@ -118,6 +118,7 @@ mod tests {
            .unwrap();
        let mut state = DropDatabaseRemoveMetadata;
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: "foo".to_string(),
            schema: "bar".to_string(),
            drop_if_exists: true,
@@ -144,6 +145,7 @@ mod tests {
        // Schema not exists
        let mut state = DropDatabaseRemoveMetadata;
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: "foo".to_string(),
            schema: "bar".to_string(),
            drop_if_exists: true,
--- a/src/common/meta/src/ddl/drop_database/start.rs
+++ b/src/common/meta/src/ddl/drop_database/start.rs
@@ -89,6 +89,7 @@ mod tests {
        let ddl_context = new_ddl_context(node_manager);
        let mut step = DropDatabaseStart;
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: "foo".to_string(),
            schema: "bar".to_string(),
            drop_if_exists: false,
@@ -104,6 +105,7 @@ mod tests {
        let ddl_context = new_ddl_context(node_manager);
        let mut state = DropDatabaseStart;
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: "foo".to_string(),
            schema: "bar".to_string(),
            drop_if_exists: true,
@@ -126,6 +128,7 @@ mod tests {
            .unwrap();
        let mut state = DropDatabaseStart;
        let mut ctx = DropDatabaseContext {
            cluster_id: 0,
            catalog: "foo".to_string(),
            schema: "bar".to_string(),
            drop_if_exists: false,
--- a/src/common/meta/src/ddl/drop_flow.rs
+++ b/src/common/meta/src/ddl/drop_flow.rs
@@ -37,8 +37,8 @@ use crate::instruction::{CacheIdent, DropFlow};
 use crate::key::flow::flow_info::FlowInfoValue;
 use crate::key::flow::flow_route::FlowRouteValue;
 use crate::lock_key::{CatalogLock, FlowLock};
 use crate::metrics;
 use crate::rpc::ddl::DropFlowTask;
 use crate::{metrics, ClusterId};
 /// The procedure for dropping a flow.
 pub struct DropFlowProcedure {
@@ -51,11 +51,12 @@ pub struct DropFlowProcedure {
 impl DropFlowProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::DropFlow";
-    pub fn new(task: DropFlowTask, context: DdlContext) -> Self {
+    pub fn new(cluster_id: ClusterId, task: DropFlowTask, context: DdlContext) -> Self {
        Self {
            context,
            data: DropFlowData {
                state: DropFlowState::Prepare,
                cluster_id,
                task,
                flow_info_value: None,
                flow_route_values: vec![],
@@ -217,6 +218,7 @@ impl Procedure for DropFlowProcedure {
 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct DropFlowData {
    state: DropFlowState,
    cluster_id: ClusterId,
    task: DropFlowTask,
    pub(crate) flow_info_value: Option<FlowInfoValue>,
    pub(crate) flow_route_values: Vec<FlowRouteValue>,
--- a/src/common/meta/src/ddl/drop_table.rs
+++ b/src/common/meta/src/ddl/drop_table.rs
@@ -40,10 +40,10 @@ use crate::ddl::DdlContext;
 use crate::error::{self, Result};
 use crate::key::table_route::TableRouteValue;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::metrics;
 use crate::region_keeper::OperatingRegionGuard;
 use crate::rpc::ddl::DropTableTask;
 use crate::rpc::router::{operating_leader_regions, RegionRoute};
 use crate::{metrics, ClusterId};
 pub struct DropTableProcedure {
    /// The context of procedure runtime.
@@ -59,8 +59,8 @@ pub struct DropTableProcedure {
 impl DropTableProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::DropTable";
-    pub fn new(task: DropTableTask, context: DdlContext) -> Self {
+    pub fn new(cluster_id: ClusterId, task: DropTableTask, context: DdlContext) -> Self {
-        let data = DropTableData::new(task);
+        let data = DropTableData::new(cluster_id, task);
        let executor = data.build_executor();
        Self {
            context,
@@ -268,6 +268,7 @@ impl Procedure for DropTableProcedure {
 #[derive(Debug, Serialize, Deserialize)]
 pub struct DropTableData {
    pub state: DropTableState,
    pub cluster_id: ClusterId,
    pub task: DropTableTask,
    pub physical_region_routes: Vec<RegionRoute>,
    pub physical_table_id: Option<TableId>,
@@ -278,9 +279,10 @@ pub struct DropTableData {
 }
 impl DropTableData {
-    pub fn new(task: DropTableTask) -> Self {
+    pub fn new(cluster_id: ClusterId, task: DropTableTask) -> Self {
        Self {
            state: DropTableState::Prepare,
            cluster_id,
            task,
            physical_region_routes: vec![],
            physical_table_id: None,
@@ -299,6 +301,7 @@ impl DropTableData {
    fn build_executor(&self) -> DropTableExecutor {
        DropTableExecutor::new(
            self.cluster_id,
            self.task.table_name(),
            self.task.table_id,
            self.task.drop_if_exists,
--- a/src/common/meta/src/ddl/drop_table/executor.rs
+++ b/src/common/meta/src/ddl/drop_table/executor.rs
@@ -36,6 +36,7 @@ use crate::instruction::CacheIdent;
 use crate::key::table_name::TableNameKey;
 use crate::key::table_route::TableRouteValue;
 use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
 use crate::ClusterId;
 /// [Control] indicated to the caller whether to go to the next step.
 #[derive(Debug)]
@@ -53,8 +54,14 @@ impl<T> Control<T> {
 impl DropTableExecutor {
    /// Returns the [DropTableExecutor].
-    pub fn new(table: TableName, table_id: TableId, drop_if_exists: bool) -> Self {
+    pub fn new(
        cluster_id: ClusterId,
        table: TableName,
        table_id: TableId,
        drop_if_exists: bool,
    ) -> Self {
        Self {
            cluster_id,
            table,
            table_id,
            drop_if_exists,
@@ -67,6 +74,7 @@ impl DropTableExecutor {
 /// - Invalidates the cache on the Frontend nodes.
 /// - Drops the regions on the Datanode nodes.
 pub struct DropTableExecutor {
    cluster_id: ClusterId,
    table: TableName,
    table_id: TableId,
    drop_if_exists: bool,
@@ -156,7 +164,7 @@ impl DropTableExecutor {
        let detecting_regions = if table_route_value.is_physical() {
            // Safety: checked.
            let regions = table_route_value.region_routes().unwrap();
-            convert_region_routes_to_detecting_regions(regions)
+            convert_region_routes_to_detecting_regions(self.cluster_id, regions)
        } else {
            vec![]
        };
@@ -313,6 +321,7 @@ mod tests {
        let node_manager = Arc::new(MockDatanodeManager::new(()));
        let ctx = new_ddl_context(node_manager);
        let executor = DropTableExecutor::new(
            0,
            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
            1024,
            true,
@@ -322,6 +331,7 @@ mod tests {
        // Drops a non-exists table
        let executor = DropTableExecutor::new(
            0,
            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
            1024,
            false,
@@ -331,6 +341,7 @@ mod tests {
        // Drops a exists table
        let executor = DropTableExecutor::new(
            0,
            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
            1024,
            false,
--- a/src/common/meta/src/ddl/drop_view.rs
+++ b/src/common/meta/src/ddl/drop_view.rs
@@ -31,8 +31,8 @@ use crate::error::{self, Result};
 use crate::instruction::CacheIdent;
 use crate::key::table_name::TableNameKey;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::metrics;
 use crate::rpc::ddl::DropViewTask;
 use crate::{metrics, ClusterId};
 /// The procedure for dropping a view.
 pub struct DropViewProcedure {
@@ -45,11 +45,12 @@ pub struct DropViewProcedure {
 impl DropViewProcedure {
    pub const TYPE_NAME: &'static str = "metasrv-procedure::DropView";
-    pub fn new(task: DropViewTask, context: DdlContext) -> Self {
+    pub fn new(cluster_id: ClusterId, task: DropViewTask, context: DdlContext) -> Self {
        Self {
            context,
            data: DropViewData {
                state: DropViewState::Prepare,
                cluster_id,
                task,
            },
        }
@@ -215,6 +216,7 @@ impl Procedure for DropViewProcedure {
 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct DropViewData {
    state: DropViewState,
    cluster_id: ClusterId,
    task: DropViewTask,
 }
--- a/src/common/meta/src/ddl/flow_meta.rs
+++ b/src/common/meta/src/ddl/flow_meta.rs
@@ -20,6 +20,7 @@ use crate::error::Result;
 use crate::key::FlowId;
 use crate::peer::Peer;
 use crate::sequence::SequenceRef;
 use crate::ClusterId;
 /// The reference of [FlowMetadataAllocator].
 pub type FlowMetadataAllocatorRef = Arc<FlowMetadataAllocator>;
@@ -59,9 +60,16 @@ impl FlowMetadataAllocator {
    }
    /// Allocates the [FlowId] and [Peer]s.
-    pub async fn create(&self, partitions: usize) -> Result<(FlowId, Vec<Peer>)> {
+    pub async fn create(
        &self,
        cluster_id: ClusterId,
        partitions: usize,
    ) -> Result<(FlowId, Vec<Peer>)> {
        let flow_id = self.allocate_flow_id().await?;
-        let peers = self.partition_peer_allocator.alloc(partitions).await?;
+        let peers = self
            .partition_peer_allocator
            .alloc(cluster_id, partitions)
            .await?;
        Ok((flow_id, peers))
    }
@@ -71,7 +79,7 @@ impl FlowMetadataAllocator {
 #[async_trait]
 pub trait PartitionPeerAllocator: Send + Sync {
    /// Allocates [Peer] nodes for storing partitions.
-    async fn alloc(&self, partitions: usize) -> Result<Vec<Peer>>;
+    async fn alloc(&self, cluster_id: ClusterId, partitions: usize) -> Result<Vec<Peer>>;
 }
 /// [PartitionPeerAllocatorRef] allocates [Peer]s for partitions.
@@ -81,7 +89,7 @@ struct NoopPartitionPeerAllocator;
 #[async_trait]
 impl PartitionPeerAllocator for NoopPartitionPeerAllocator {
-    async fn alloc(&self, partitions: usize) -> Result<Vec<Peer>> {
+    async fn alloc(&self, _cluster_id: ClusterId, partitions: usize) -> Result<Vec<Peer>> {
        Ok(vec![Peer::default(); partitions])
    }
 }
--- a/src/common/meta/src/ddl/table_meta.rs
+++ b/src/common/meta/src/ddl/table_meta.rs
@@ -20,7 +20,7 @@ use common_telemetry::{debug, info};
 use snafu::ensure;
 use store_api::storage::{RegionId, RegionNumber, TableId};
-use crate::ddl::TableMetadata;
+use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
 use crate::error::{self, Result, UnsupportedSnafu};
 use crate::key::table_route::PhysicalTableRouteValue;
 use crate::peer::Peer;
@@ -109,6 +109,7 @@ impl TableMetadataAllocator {
    async fn create_table_route(
        &self,
        ctx: &TableMetadataAllocatorContext,
        table_id: TableId,
        task: &CreateTableTask,
    ) -> Result<PhysicalTableRouteValue> {
@@ -120,7 +121,7 @@ impl TableMetadataAllocator {
            }
        );
-        let peers = self.peer_allocator.alloc(regions).await?;
+        let peers = self.peer_allocator.alloc(ctx, regions).await?;
        let region_routes = task
            .partitions
            .iter()
@@ -146,7 +147,11 @@ impl TableMetadataAllocator {
    }
    /// Create VIEW metadata
-    pub async fn create_view(&self, table_id: &Option<api::v1::TableId>) -> Result<TableMetadata> {
+    pub async fn create_view(
        &self,
        _ctx: &TableMetadataAllocatorContext,
        table_id: &Option<api::v1::TableId>,
    ) -> Result<TableMetadata> {
        let table_id = self.allocate_table_id(table_id).await?;
        Ok(TableMetadata {
@@ -155,9 +160,13 @@ impl TableMetadataAllocator {
        })
    }
-    pub async fn create(&self, task: &CreateTableTask) -> Result<TableMetadata> {
+    pub async fn create(
        &self,
        ctx: &TableMetadataAllocatorContext,
        task: &CreateTableTask,
    ) -> Result<TableMetadata> {
        let table_id = self.allocate_table_id(&task.create_table.table_id).await?;
-        let table_route = self.create_table_route(table_id, task).await?;
+        let table_route = self.create_table_route(ctx, table_id, task).await?;
        let region_wal_options = self.create_wal_options(&table_route)?;
        debug!(
@@ -179,14 +188,19 @@ pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
 #[async_trait]
 pub trait PeerAllocator: Send + Sync {
    /// Allocates `regions` size [`Peer`]s.
-    async fn alloc(&self, regions: usize) -> Result<Vec<Peer>>;
+    async fn alloc(&self, ctx: &TableMetadataAllocatorContext, regions: usize)
        -> Result<Vec<Peer>>;
 }
 struct NoopPeerAllocator;
 #[async_trait]
 impl PeerAllocator for NoopPeerAllocator {
-    async fn alloc(&self, regions: usize) -> Result<Vec<Peer>> {
+    async fn alloc(
        &self,
        _ctx: &TableMetadataAllocatorContext,
        regions: usize,
    ) -> Result<Vec<Peer>> {
        Ok(vec![Peer::default(); regions])
    }
 }
--- a/src/common/meta/src/ddl/test_util.rs
+++ b/src/common/meta/src/ddl/test_util.rs
@@ -31,9 +31,10 @@ use crate::ddl::test_util::columns::TestColumnDefBuilder;
 use crate::ddl::test_util::create_table::{
    build_raw_table_info_from_expr, TestCreateTableExprBuilder,
 };
-use crate::ddl::{DdlContext, TableMetadata};
+use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
 use crate::key::table_route::TableRouteValue;
 use crate::rpc::ddl::CreateTableTask;
 use crate::ClusterId;
 pub async fn create_physical_table_metadata(
    ddl_context: &DdlContext,
@@ -47,7 +48,11 @@ pub async fn create_physical_table_metadata(
        .unwrap();
 }
-pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> TableId {
+pub async fn create_physical_table(
    ddl_context: &DdlContext,
    cluster_id: ClusterId,
    name: &str,
 ) -> TableId {
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task(name);
    let TableMetadata {
@@ -56,7 +61,10 @@ pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> Tabl
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -72,13 +80,15 @@ pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> Tabl
 pub async fn create_logical_table(
    ddl_context: DdlContext,
    cluster_id: ClusterId,
    physical_table_id: TableId,
    table_name: &str,
 ) -> TableId {
    use std::assert_matches::assert_matches;
    let tasks = vec![test_create_logical_table_task(table_name)];
-    let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
+    let mut procedure =
        CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
    let status = procedure.on_create_metadata().await.unwrap();
--- a/src/common/meta/src/ddl/tests/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/tests/alter_logical_tables.rs
@@ -86,6 +86,7 @@ fn make_alter_logical_table_rename_task(
 async fn test_on_prepare_check_schema() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let tasks = vec![
        make_alter_logical_table_add_column_task(
            Some("schema1"),
@@ -99,7 +100,8 @@ async fn test_on_prepare_check_schema() {
        ),
    ];
    let physical_table_id = 1024u32;
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
+    let mut procedure =
        AlterLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
 }
@@ -108,46 +110,50 @@ async fn test_on_prepare_check_schema() {
 async fn test_on_prepare_check_alter_kind() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let tasks = vec![make_alter_logical_table_rename_task(
        "schema1",
        "table1",
        "new_table1",
    )];
    let physical_table_id = 1024u32;
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
+    let mut procedure =
        AlterLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
 }
 #[tokio::test]
 async fn test_on_prepare_different_physical_table() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
-    let phy1_id = create_physical_table(&ddl_context, "phy1").await;
+    let phy1_id = create_physical_table(&ddl_context, cluster_id, "phy1").await;
-    create_logical_table(ddl_context.clone(), phy1_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy1_id, "table1").await;
-    let phy2_id = create_physical_table(&ddl_context, "phy2").await;
+    let phy2_id = create_physical_table(&ddl_context, cluster_id, "phy2").await;
-    create_logical_table(ddl_context.clone(), phy2_id, "table2").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy2_id, "table2").await;
    let tasks = vec![
        make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
        make_alter_logical_table_add_column_task(None, "table2", vec!["column2".to_string()]),
    ];
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy1_id, ddl_context);
+    let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy1_id, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
 }
 #[tokio::test]
 async fn test_on_prepare_logical_table_not_exists() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    // Creates physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
    let tasks = vec![
        make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
@@ -155,22 +161,23 @@ async fn test_on_prepare_logical_table_not_exists() {
        make_alter_logical_table_add_column_task(None, "table2", vec!["column2".to_string()]),
    ];
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
+    let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, TableNotFound { .. });
 }
 #[tokio::test]
 async fn test_on_prepare() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    // Creates physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table2").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table3").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
    let tasks = vec![
        make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
@@ -178,24 +185,25 @@ async fn test_on_prepare() {
        make_alter_logical_table_add_column_task(None, "table3", vec!["column3".to_string()]),
    ];
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
+    let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
    let result = procedure.on_prepare().await;
    assert_matches!(result, Ok(Status::Executing { persist: true }));
 }
 #[tokio::test]
 async fn test_on_update_metadata() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    // Creates physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table2").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table3").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table4").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table4").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table5").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table5").await;
    let tasks = vec![
        make_alter_logical_table_add_column_task(None, "table1", vec!["new_col".to_string()]),
@@ -203,7 +211,7 @@ async fn test_on_update_metadata() {
        make_alter_logical_table_add_column_task(None, "table3", vec!["new_col".to_string()]),
    ];
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
+    let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
    let mut status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
@@ -221,21 +229,23 @@ async fn test_on_update_metadata() {
 #[tokio::test]
 async fn test_on_part_duplicate_alter_request() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    // Creates physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table2").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
    let tasks = vec![
        make_alter_logical_table_add_column_task(None, "table1", vec!["col_0".to_string()]),
        make_alter_logical_table_add_column_task(None, "table2", vec!["col_0".to_string()]),
    ];
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context.clone());
+    let mut procedure =
        AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context.clone());
    let mut status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
@@ -268,7 +278,8 @@ async fn test_on_part_duplicate_alter_request() {
        ),
    ];
-    let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context.clone());
+    let mut procedure =
        AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context.clone());
    let mut status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
--- a/src/common/meta/src/ddl/tests/alter_table.rs
+++ b/src/common/meta/src/ddl/tests/alter_table.rs
@@ -59,6 +59,7 @@ fn test_rename_alter_table_task(table_name: &str, new_table_name: &str) -> Alter
 async fn test_on_prepare_table_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_table_task("foo", 1024);
    // Puts a value to table name key.
    ddl_context
@@ -72,7 +73,7 @@ async fn test_on_prepare_table_exists_err() {
        .unwrap();
    let task = test_rename_alter_table_task("non-exists", "foo");
-    let mut procedure = AlterTableProcedure::new(1024, task, ddl_context).unwrap();
+    let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err.status_code(), StatusCode::TableAlreadyExists);
 }
@@ -81,8 +82,9 @@ async fn test_on_prepare_table_exists_err() {
 async fn test_on_prepare_table_not_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_rename_alter_table_task("non-exists", "foo");
-    let mut procedure = AlterTableProcedure::new(1024, task, ddl_context).unwrap();
+    let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err.status_code(), StatusCode::TableNotFound);
 }
@@ -93,6 +95,7 @@ async fn test_on_submit_alter_request() {
    let datanode_handler = DatanodeWatcher(tx);
    let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_id = 1024;
    let table_name = "foo";
    let task = test_create_table_task(table_name, table_id);
@@ -141,7 +144,8 @@ async fn test_on_submit_alter_request() {
            })),
        },
    };
-    let mut procedure = AlterTableProcedure::new(table_id, alter_table_task, ddl_context).unwrap();
+    let mut procedure =
        AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
    procedure.on_prepare().await.unwrap();
    procedure.submit_alter_region_requests().await.unwrap();
@@ -177,6 +181,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
        RequestOutdatedErrorDatanodeHandler,
    ));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_id = 1024;
    let table_name = "foo";
    let task = test_create_table_task(table_name, table_id);
@@ -225,7 +230,8 @@ async fn test_on_submit_alter_request_with_outdated_request() {
            })),
        },
    };
-    let mut procedure = AlterTableProcedure::new(table_id, alter_table_task, ddl_context).unwrap();
+    let mut procedure =
        AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
    procedure.on_prepare().await.unwrap();
    procedure.submit_alter_region_requests().await.unwrap();
 }
@@ -234,6 +240,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
 async fn test_on_update_metadata_rename() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_name = "foo";
    let new_table_name = "bar";
    let table_id = 1024;
@@ -250,7 +257,8 @@ async fn test_on_update_metadata_rename() {
        .unwrap();
    let task = test_rename_alter_table_task(table_name, new_table_name);
-    let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
+    let mut procedure =
        AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
    procedure.on_prepare().await.unwrap();
    procedure.on_update_metadata().await.unwrap();
@@ -283,6 +291,7 @@ async fn test_on_update_metadata_rename() {
 async fn test_on_update_metadata_add_columns() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_name = "foo";
    let table_id = 1024;
    let task = test_create_table_task(table_name, table_id);
@@ -326,7 +335,8 @@ async fn test_on_update_metadata_add_columns() {
            })),
        },
    };
-    let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
+    let mut procedure =
        AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
    procedure.on_prepare().await.unwrap();
    procedure.submit_alter_region_requests().await.unwrap();
    procedure.on_update_metadata().await.unwrap();
@@ -351,6 +361,7 @@ async fn test_on_update_metadata_add_columns() {
 async fn test_on_update_table_options() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_name = "foo";
    let table_id = 1024;
    let task = test_create_table_task(table_name, table_id);
@@ -387,7 +398,8 @@ async fn test_on_update_table_options() {
            })),
        },
    };
-    let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
+    let mut procedure =
        AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
    procedure.on_prepare().await.unwrap();
    procedure.submit_alter_region_requests().await.unwrap();
    procedure.on_update_metadata().await.unwrap();
--- a/src/common/meta/src/ddl/tests/create_flow.rs
+++ b/src/common/meta/src/ddl/tests/create_flow.rs
@@ -25,11 +25,11 @@ use crate::ddl::create_flow::CreateFlowProcedure;
 use crate::ddl::test_util::create_table::test_create_table_task;
 use crate::ddl::test_util::flownode_handler::NaiveFlownodeHandler;
 use crate::ddl::DdlContext;
 use crate::error;
 use crate::key::table_route::TableRouteValue;
 use crate::key::FlowId;
 use crate::rpc::ddl::CreateFlowTask;
 use crate::test_util::{new_ddl_context, MockFlownodeManager};
 use crate::{error, ClusterId};
 pub(crate) fn test_create_flow_task(
    name: &str,
@@ -53,6 +53,7 @@ pub(crate) fn test_create_flow_task(
 #[tokio::test]
 async fn test_create_flow_source_table_not_found() {
    let cluster_id = 1;
    let source_table_names = vec![TableName::new(
        DEFAULT_CATALOG_NAME,
        DEFAULT_SCHEMA_NAME,
@@ -64,13 +65,14 @@ async fn test_create_flow_source_table_not_found() {
    let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let query_ctx = QueryContext::arc().into();
-    let mut procedure = CreateFlowProcedure::new(task, query_ctx, ddl_context);
+    let mut procedure = CreateFlowProcedure::new(cluster_id, task, query_ctx, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, error::Error::TableNotFound { .. });
 }
 pub(crate) async fn create_test_flow(
    ddl_context: &DdlContext,
    cluster_id: ClusterId,
    flow_name: &str,
    source_table_names: Vec<TableName>,
    sink_table_name: TableName,
@@ -82,7 +84,8 @@ pub(crate) async fn create_test_flow(
        false,
    );
    let query_ctx = QueryContext::arc().into();
-    let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context.clone());
+    let mut procedure =
        CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context.clone());
    let output = execute_procedure_until_done(&mut procedure).await.unwrap();
    let flow_id = output.downcast_ref::<FlowId>().unwrap();
@@ -91,6 +94,7 @@ pub(crate) async fn create_test_flow(
 #[tokio::test]
 async fn test_create_flow() {
    let cluster_id = 1;
    let table_id = 1024;
    let source_table_names = vec![TableName::new(
        DEFAULT_CATALOG_NAME,
@@ -114,6 +118,7 @@ async fn test_create_flow() {
        .unwrap();
    let flow_id = create_test_flow(
        &ddl_context,
        cluster_id,
        "my_flow",
        source_table_names.clone(),
        sink_table_name.clone(),
@@ -129,7 +134,8 @@ async fn test_create_flow() {
        true,
    );
    let query_ctx = QueryContext::arc().into();
-    let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context.clone());
+    let mut procedure =
        CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context.clone());
    let output = execute_procedure_until_done(&mut procedure).await.unwrap();
    let flow_id = output.downcast_ref::<FlowId>().unwrap();
    assert_eq!(*flow_id, 1024);
@@ -137,7 +143,7 @@ async fn test_create_flow() {
    // Creates again
    let task = test_create_flow_task("my_flow", source_table_names, sink_table_name, false);
    let query_ctx = QueryContext::arc().into();
-    let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context);
+    let mut procedure = CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, error::Error::FlowAlreadyExists { .. });
 }
--- a/src/common/meta/src/ddl/tests/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/tests/create_logical_tables.rs
@@ -26,7 +26,7 @@ use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
 use crate::ddl::test_util::{
    create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
 };
-use crate::ddl::TableMetadata;
+use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
 use crate::error::Error;
 use crate::key::table_route::TableRouteValue;
 use crate::test_util::{new_ddl_context, MockDatanodeManager};
@@ -35,9 +35,11 @@ use crate::test_util::{new_ddl_context, MockDatanodeManager};
 async fn test_on_prepare_physical_table_not_found() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let tasks = vec![test_create_logical_table_task("foo")];
    let physical_table_id = 1024u32;
-    let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
+    let mut procedure =
        CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, Error::TableRouteNotFound { .. });
 }
@@ -46,6 +48,7 @@ async fn test_on_prepare_physical_table_not_found() {
 async fn test_on_prepare() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -54,7 +57,10 @@ async fn test_on_prepare() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -67,7 +73,8 @@ async fn test_on_prepare() {
    // The create logical table procedure.
    let tasks = vec![test_create_logical_table_task("foo")];
    let physical_table_id = table_id;
-    let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
+    let mut procedure =
        CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
 }
@@ -76,6 +83,7 @@ async fn test_on_prepare() {
 async fn test_on_prepare_logical_table_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -84,7 +92,10 @@ async fn test_on_prepare_logical_table_exists_err() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -108,7 +119,7 @@ async fn test_on_prepare_logical_table_exists_err() {
    // The create logical table procedure.
    let physical_table_id = table_id;
    let mut procedure =
-        CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context);
+        CreateLogicalTablesProcedure::new(cluster_id, vec![task], physical_table_id, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, Error::TableAlreadyExists { .. });
    assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -118,6 +129,7 @@ async fn test_on_prepare_logical_table_exists_err() {
 async fn test_on_prepare_with_create_if_table_exists() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -126,7 +138,10 @@ async fn test_on_prepare_with_create_if_table_exists() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -152,7 +167,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
    // Sets `create_if_not_exists`
    task.create_table.create_if_not_exists = true;
    let mut procedure =
-        CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context);
+        CreateLogicalTablesProcedure::new(cluster_id, vec![task], physical_table_id, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    let output = status.downcast_output_ref::<Vec<u32>>().unwrap();
    assert_eq!(*output, vec![8192]);
@@ -162,6 +177,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
 async fn test_on_prepare_part_logical_tables_exist() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -170,7 +186,10 @@ async fn test_on_prepare_part_logical_tables_exist() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -197,6 +216,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
    task.create_table.create_if_not_exists = true;
    let non_exist_task = test_create_logical_table_task("non_exists");
    let mut procedure = CreateLogicalTablesProcedure::new(
        cluster_id,
        vec![task, non_exist_task],
        physical_table_id,
        ddl_context,
@@ -209,6 +229,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
 async fn test_on_create_metadata() {
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -217,7 +238,10 @@ async fn test_on_create_metadata() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -233,6 +257,7 @@ async fn test_on_create_metadata() {
    let task = test_create_logical_table_task("foo");
    let yet_another_task = test_create_logical_table_task("bar");
    let mut procedure = CreateLogicalTablesProcedure::new(
        cluster_id,
        vec![task, yet_another_task],
        physical_table_id,
        ddl_context,
@@ -254,6 +279,7 @@ async fn test_on_create_metadata() {
 async fn test_on_create_metadata_part_logical_tables_exist() {
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -262,7 +288,10 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -289,6 +318,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
    task.create_table.create_if_not_exists = true;
    let non_exist_task = test_create_logical_table_task("non_exists");
    let mut procedure = CreateLogicalTablesProcedure::new(
        cluster_id,
        vec![task, non_exist_task],
        physical_table_id,
        ddl_context,
@@ -310,6 +340,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
 async fn test_on_create_metadata_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -318,7 +349,10 @@ async fn test_on_create_metadata_err() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -334,6 +368,7 @@ async fn test_on_create_metadata_err() {
    let task = test_create_logical_table_task("foo");
    let yet_another_task = test_create_logical_table_task("bar");
    let mut procedure = CreateLogicalTablesProcedure::new(
        cluster_id,
        vec![task.clone(), yet_another_task],
        physical_table_id,
        ddl_context.clone(),
--- a/src/common/meta/src/ddl/tests/create_table.rs
+++ b/src/common/meta/src/ddl/tests/create_table.rs
@@ -87,6 +87,7 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
 async fn test_on_prepare_table_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_table_task("foo");
    assert!(!task.create_table.create_if_not_exists);
    // Puts a value to table name key.
@@ -99,7 +100,7 @@ async fn test_on_prepare_table_exists_err() {
        )
        .await
        .unwrap();
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, Error::TableAlreadyExists { .. });
    assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -109,6 +110,7 @@ async fn test_on_prepare_table_exists_err() {
 async fn test_on_prepare_with_create_if_table_exists() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let mut task = test_create_table_task("foo");
    task.create_table.create_if_not_exists = true;
    task.table_info.ident.table_id = 1024;
@@ -122,7 +124,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
        )
        .await
        .unwrap();
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Done { output: Some(..) });
    let table_id = *status.downcast_output_ref::<u32>().unwrap();
@@ -133,9 +135,10 @@ async fn test_on_prepare_with_create_if_table_exists() {
 async fn test_on_prepare_without_create_if_table_exists() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let mut task = test_create_table_task("foo");
    task.create_table.create_if_not_exists = true;
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
    assert_eq!(procedure.table_id(), 1024);
@@ -145,10 +148,11 @@ async fn test_on_prepare_without_create_if_table_exists() {
 async fn test_on_prepare_with_no_partition_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let mut task = test_create_table_task("foo");
    task.partitions = vec![];
    task.create_table.create_if_not_exists = true;
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, Error::Unexpected { .. });
    assert!(err
@@ -161,9 +165,10 @@ async fn test_on_datanode_create_regions_should_retry() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_table_task("foo");
    assert!(!task.create_table.create_if_not_exists);
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -178,9 +183,10 @@ async fn test_on_datanode_create_regions_should_not_retry() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_table_task("foo");
    assert!(!task.create_table.create_if_not_exists);
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -195,9 +201,10 @@ async fn test_on_create_metadata_error() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_table_task("foo");
    assert!(!task.create_table.create_if_not_exists);
-    let mut procedure = CreateTableProcedure::new(task.clone(), ddl_context.clone());
+    let mut procedure = CreateTableProcedure::new(cluster_id, task.clone(), ddl_context.clone());
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -226,9 +233,10 @@ async fn test_on_create_metadata() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_table_task("foo");
    assert!(!task.create_table.create_if_not_exists);
-    let mut procedure = CreateTableProcedure::new(task, ddl_context);
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -243,12 +251,14 @@ async fn test_on_create_metadata() {
 #[tokio::test]
 async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let kv_backend = Arc::new(MemoryKvBackend::new());
    let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
    let task = test_create_table_task("foo");
-    let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
+    let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
    execute_procedure_until(&mut procedure, |p| {
        p.creator.data.state == CreateTableState::CreateMetadata
--- a/src/common/meta/src/ddl/tests/create_view.rs
+++ b/src/common/meta/src/ddl/tests/create_view.rs
@@ -97,6 +97,7 @@ pub(crate) fn test_create_view_task(name: &str) -> CreateViewTask {
 async fn test_on_prepare_view_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_view_task("foo");
    assert!(!task.create_view.create_if_not_exists);
    // Puts a value to table name key.
@@ -112,7 +113,7 @@ async fn test_on_prepare_view_exists_err() {
        )
        .await
        .unwrap();
-    let mut procedure = CreateViewProcedure::new(task, ddl_context);
+    let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, Error::ViewAlreadyExists { .. });
    assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -122,6 +123,7 @@ async fn test_on_prepare_view_exists_err() {
 async fn test_on_prepare_with_create_if_view_exists() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let mut task = test_create_view_task("foo");
    task.create_view.create_if_not_exists = true;
    task.view_info.ident.table_id = 1024;
@@ -138,7 +140,7 @@ async fn test_on_prepare_with_create_if_view_exists() {
        )
        .await
        .unwrap();
-    let mut procedure = CreateViewProcedure::new(task, ddl_context);
+    let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Done { output: Some(..) });
    let table_id = *status.downcast_output_ref::<u32>().unwrap();
@@ -149,9 +151,10 @@ async fn test_on_prepare_with_create_if_view_exists() {
 async fn test_on_prepare_without_create_if_table_exists() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let mut task = test_create_view_task("foo");
    task.create_view.create_if_not_exists = true;
-    let mut procedure = CreateViewProcedure::new(task, ddl_context);
+    let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
    let status = procedure.on_prepare().await.unwrap();
    assert_matches!(status, Status::Executing { persist: true });
    assert_eq!(procedure.view_id(), 1024);
@@ -162,9 +165,10 @@ async fn test_on_create_metadata() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let task = test_create_view_task("foo");
    assert!(!task.create_view.create_if_not_exists);
-    let mut procedure = CreateViewProcedure::new(task, ddl_context);
+    let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -181,9 +185,10 @@ async fn test_replace_view_metadata() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager.clone());
    let cluster_id = 1;
    let task = test_create_view_task("foo");
    assert!(!task.create_view.create_if_not_exists);
-    let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
+    let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -208,7 +213,7 @@ async fn test_replace_view_metadata() {
    let mut task = test_create_view_task("foo");
    // The view already exists, prepare should fail
    {
-        let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
+        let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
        let err = procedure.on_prepare().await.unwrap_err();
        assert_matches!(err, Error::ViewAlreadyExists { .. });
        assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -219,7 +224,7 @@ async fn test_replace_view_metadata() {
    task.create_view.logical_plan = vec![4, 5, 6];
    task.create_view.definition = "new_definition".to_string();
-    let mut procedure = CreateViewProcedure::new(task, ddl_context.clone());
+    let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context.clone());
    procedure.on_prepare().await.unwrap();
    let ctx = ProcedureContext {
        procedure_id: ProcedureId::random(),
@@ -249,11 +254,12 @@ async fn test_replace_table() {
    common_telemetry::init_default_ut_logging();
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager.clone());
    let cluster_id = 1;
    {
        // Create a `foo` table.
        let task = test_create_table_task("foo");
-        let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
+        let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
        procedure.on_prepare().await.unwrap();
        let ctx = ProcedureContext {
            procedure_id: ProcedureId::random(),
@@ -266,7 +272,7 @@ async fn test_replace_table() {
    // Try to replace a view named `foo` too.
    let mut task = test_create_view_task("foo");
    task.create_view.or_replace = true;
-    let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
+    let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, Error::TableAlreadyExists { .. });
    assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
--- a/src/common/meta/src/ddl/tests/drop_database.rs
+++ b/src/common/meta/src/ddl/tests/drop_database.rs
@@ -31,6 +31,7 @@ use crate::test_util::{new_ddl_context, MockDatanodeManager};
 #[tokio::test]
 async fn test_drop_database_with_logical_tables() {
    common_telemetry::init_default_ut_logging();
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    ddl_context
@@ -44,11 +45,11 @@ async fn test_drop_database_with_logical_tables() {
        .await
        .unwrap();
    // Creates physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table2").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table3").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
    let mut procedure = DropDatabaseProcedure::new(
        DEFAULT_CATALOG_NAME.to_string(),
@@ -79,6 +80,7 @@ async fn test_drop_database_with_logical_tables() {
 #[tokio::test]
 async fn test_drop_database_retryable_error() {
    common_telemetry::init_default_ut_logging();
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    ddl_context
@@ -92,11 +94,11 @@ async fn test_drop_database_retryable_error() {
        .await
        .unwrap();
    // Creates physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates 3 logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table2").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
-    create_logical_table(ddl_context.clone(), phy_id, "table3").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
    let mut procedure = DropDatabaseProcedure::new(
        DEFAULT_CATALOG_NAME.to_string(),
@@ -126,6 +128,7 @@ async fn test_drop_database_retryable_error() {
 #[tokio::test]
 async fn test_drop_database_recover() {
    common_telemetry::init_default_ut_logging();
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    ddl_context
@@ -139,9 +142,9 @@ async fn test_drop_database_recover() {
        .await
        .unwrap();
    // Creates a physical table
-    let phy_id = create_physical_table(&ddl_context, "phy").await;
+    let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
    // Creates a logical tables
-    create_logical_table(ddl_context.clone(), phy_id, "table1").await;
+    create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
    let mut procedure = DropDatabaseProcedure::new(
        DEFAULT_CATALOG_NAME.to_string(),
        DEFAULT_SCHEMA_NAME.to_string(),
--- a/src/common/meta/src/ddl/tests/drop_flow.rs
+++ b/src/common/meta/src/ddl/tests/drop_flow.rs
@@ -40,11 +40,12 @@ fn test_drop_flow_task(flow_name: &str, flow_id: u32, drop_if_exists: bool) -> D
 #[tokio::test]
 async fn test_drop_flow_not_found() {
    let cluster_id = 1;
    let flow_id = 1024;
    let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
    let ddl_context = new_ddl_context(node_manager);
    let task = test_drop_flow_task("my_flow", flow_id, false);
-    let mut procedure = DropFlowProcedure::new(task, ddl_context);
+    let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, error::Error::FlowNotFound { .. });
 }
@@ -52,6 +53,7 @@ async fn test_drop_flow_not_found() {
 #[tokio::test]
 async fn test_drop_flow() {
    // create a flow
    let cluster_id = 1;
    let table_id = 1024;
    let source_table_names = vec![TableName::new(
        DEFAULT_CATALOG_NAME,
@@ -73,21 +75,27 @@ async fn test_drop_flow() {
        )
        .await
        .unwrap();
-    let flow_id =
+    let flow_id = create_test_flow(
-        create_test_flow(&ddl_context, "my_flow", source_table_names, sink_table_name).await;
+        &ddl_context,
        cluster_id,
        "my_flow",
        source_table_names,
        sink_table_name,
    )
    .await;
    // Drops the flows
    let task = test_drop_flow_task("my_flow", flow_id, false);
-    let mut procedure = DropFlowProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context.clone());
    execute_procedure_until_done(&mut procedure).await;
    // Drops if not exists
    let task = test_drop_flow_task("my_flow", flow_id, true);
-    let mut procedure = DropFlowProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context.clone());
    execute_procedure_until_done(&mut procedure).await;
    // Drops again
    let task = test_drop_flow_task("my_flow", flow_id, false);
-    let mut procedure = DropFlowProcedure::new(task, ddl_context);
+    let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_matches!(err, error::Error::FlowNotFound { .. });
 }
--- a/src/common/meta/src/ddl/tests/drop_table.rs
+++ b/src/common/meta/src/ddl/tests/drop_table.rs
@@ -35,7 +35,7 @@ use crate::ddl::test_util::{
    create_logical_table, create_physical_table, create_physical_table_metadata,
    test_create_logical_table_task, test_create_physical_table_task,
 };
-use crate::ddl::TableMetadata;
+use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
 use crate::key::table_route::TableRouteValue;
 use crate::kv_backend::memory::MemoryKvBackend;
 use crate::peer::Peer;
@@ -47,6 +47,7 @@ use crate::test_util::{new_ddl_context, new_ddl_context_with_kv_backend, MockDat
 async fn test_on_prepare_table_not_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_name = "foo";
    let table_id = 1024;
    let task = test_create_table_task(table_name, table_id);
@@ -62,7 +63,7 @@ async fn test_on_prepare_table_not_exists_err() {
        .unwrap();
    let task = new_drop_table_task("bar", table_id, false);
-    let mut procedure = DropTableProcedure::new(task, ddl_context);
+    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_eq!(err.status_code(), StatusCode::TableNotFound);
 }
@@ -71,6 +72,7 @@ async fn test_on_prepare_table_not_exists_err() {
 async fn test_on_prepare_table() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_name = "foo";
    let table_id = 1024;
    let task = test_create_table_task(table_name, table_id);
@@ -87,13 +89,13 @@ async fn test_on_prepare_table() {
    let task = new_drop_table_task("bar", table_id, true);
    // Drop if exists
-    let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
    procedure.on_prepare().await.unwrap();
    assert!(!procedure.rollback_supported());
    let task = new_drop_table_task(table_name, table_id, false);
    // Drop table
-    let mut procedure = DropTableProcedure::new(task, ddl_context);
+    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
 }
@@ -103,6 +105,7 @@ async fn test_on_datanode_drop_regions() {
    let datanode_handler = DatanodeWatcher(tx);
    let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let table_id = 1024;
    let table_name = "foo";
    let task = test_create_table_task(table_name, table_id);
@@ -141,7 +144,7 @@ async fn test_on_datanode_drop_regions() {
    let task = new_drop_table_task(table_name, table_id, false);
    // Drop table
-    let mut procedure = DropTableProcedure::new(task, ddl_context);
+    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
    procedure.on_datanode_drop_regions().await.unwrap();
@@ -176,6 +179,7 @@ async fn test_on_rollback() {
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let kv_backend = Arc::new(MemoryKvBackend::new());
    let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend.clone());
    let cluster_id = 1;
    // Prepares physical table metadata.
    let mut create_physical_table_task = test_create_physical_table_task("phy_table");
    let TableMetadata {
@@ -184,7 +188,10 @@ async fn test_on_rollback() {
        ..
    } = ddl_context
        .table_metadata_allocator
-        .create(&create_physical_table_task)
+        .create(
            &TableMetadataAllocatorContext { cluster_id },
            &create_physical_table_task,
        )
        .await
        .unwrap();
    create_physical_table_task.set_table_id(table_id);
@@ -198,8 +205,12 @@ async fn test_on_rollback() {
    let physical_table_id = table_id;
    // Creates the logical table metadata.
    let task = test_create_logical_table_task("foo");
-    let mut procedure =
+    let mut procedure = CreateLogicalTablesProcedure::new(
-        CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context.clone());
+        cluster_id,
        vec![task],
        physical_table_id,
        ddl_context.clone(),
    );
    procedure.on_prepare().await.unwrap();
    let ctx = new_test_procedure_context();
    procedure.execute(&ctx).await.unwrap();
@@ -212,7 +223,7 @@ async fn test_on_rollback() {
    // Drops the physical table
    {
        let task = new_drop_table_task("phy_table", physical_table_id, false);
-        let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
+        let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
        procedure.on_prepare().await.unwrap();
        assert!(procedure.rollback_supported());
        procedure.on_delete_metadata().await.unwrap();
@@ -227,7 +238,7 @@ async fn test_on_rollback() {
    // Drops the logical table
    let task = new_drop_table_task("foo", table_ids[0], false);
-    let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
    procedure.on_prepare().await.unwrap();
    assert!(!procedure.rollback_supported());
 }
@@ -244,15 +255,18 @@ fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool
 #[tokio::test]
 async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let kv_backend = Arc::new(MemoryKvBackend::new());
    let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
-    let physical_table_id = create_physical_table(&ddl_context, "t").await;
+    let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
-    let logical_table_id = create_logical_table(ddl_context.clone(), physical_table_id, "s").await;
+    let logical_table_id =
        create_logical_table(ddl_context.clone(), cluster_id, physical_table_id, "s").await;
    let inner_test = |task: DropTableTask| async {
-        let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
+        let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
        execute_procedure_until(&mut procedure, |p| {
            p.data.state == DropTableState::InvalidateTableCache
        })
@@ -290,13 +304,14 @@ async fn test_from_json() {
        (DropTableState::DatanodeDropRegions, 1, 1),
        (DropTableState::DeleteTombstone, 1, 0),
    ] {
        let cluster_id = 1;
        let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
        let kv_backend = Arc::new(MemoryKvBackend::new());
        let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
-        let physical_table_id = create_physical_table(&ddl_context, "t").await;
+        let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
        let task = new_drop_table_task("t", physical_table_id, false);
-        let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
+        let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
        execute_procedure_until(&mut procedure, |p| p.data.state == state).await;
        let data = procedure.dump().unwrap();
        assert_eq!(
@@ -319,13 +334,14 @@ async fn test_from_json() {
    let num_operating_regions = 0;
    let num_operating_regions_after_recovery = 0;
    let cluster_id = 1;
    let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
    let kv_backend = Arc::new(MemoryKvBackend::new());
    let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
-    let physical_table_id = create_physical_table(&ddl_context, "t").await;
+    let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
    let task = new_drop_table_task("t", physical_table_id, false);
-    let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
    execute_procedure_until_done(&mut procedure).await;
    let data = procedure.dump().unwrap();
    assert_eq!(
--- a/src/common/meta/src/ddl/tests/drop_view.rs
+++ b/src/common/meta/src/ddl/tests/drop_view.rs
@@ -41,6 +41,7 @@ fn new_drop_view_task(view: &str, view_id: TableId, drop_if_exists: bool) -> Dro
 async fn test_on_prepare_view_not_exists_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let view_id = 1024;
    let mut task = test_create_view_task("foo");
    task.view_info.ident.table_id = view_id;
@@ -59,7 +60,7 @@ async fn test_on_prepare_view_not_exists_err() {
        .unwrap();
    let task = new_drop_view_task("bar", view_id, false);
-    let mut procedure = DropViewProcedure::new(task, ddl_context);
+    let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_eq!(err.status_code(), StatusCode::TableNotFound);
 }
@@ -68,6 +69,7 @@ async fn test_on_prepare_view_not_exists_err() {
 async fn test_on_prepare_not_view_err() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let view_id = 1024;
    let view_name = "foo";
    let task = test_create_table_task(view_name, view_id);
@@ -83,7 +85,7 @@ async fn test_on_prepare_not_view_err() {
        .unwrap();
    let task = new_drop_view_task(view_name, view_id, false);
-    let mut procedure = DropViewProcedure::new(task, ddl_context);
+    let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
    // It's not a view, expect error
    let err = procedure.on_prepare().await.unwrap_err();
    assert_eq!(err.status_code(), StatusCode::InvalidArguments);
@@ -93,6 +95,7 @@ async fn test_on_prepare_not_view_err() {
 async fn test_on_prepare_success() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let view_id = 1024;
    let view_name = "foo";
    let mut task = test_create_view_task("foo");
@@ -113,12 +116,12 @@ async fn test_on_prepare_success() {
    let task = new_drop_view_task("bar", view_id, true);
    // Drop if exists
-    let mut procedure = DropViewProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context.clone());
    procedure.on_prepare().await.unwrap();
    let task = new_drop_view_task(view_name, view_id, false);
    // Prepare success
-    let mut procedure = DropViewProcedure::new(task, ddl_context);
+    let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
    procedure.on_prepare().await.unwrap();
    assert_eq!(DropViewState::DeleteMetadata, procedure.state());
 }
@@ -127,6 +130,7 @@ async fn test_on_prepare_success() {
 async fn test_drop_view_success() {
    let node_manager = Arc::new(MockDatanodeManager::new(()));
    let ddl_context = new_ddl_context(node_manager);
    let cluster_id = 1;
    let view_id = 1024;
    let view_name = "foo";
    let mut task = test_create_view_task("foo");
@@ -155,7 +159,7 @@ async fn test_drop_view_success() {
    let task = new_drop_view_task(view_name, view_id, false);
    // Prepare success
-    let mut procedure = DropViewProcedure::new(task, ddl_context.clone());
+    let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context.clone());
    execute_procedure_until_done(&mut procedure).await;
    assert_eq!(DropViewState::InvalidateViewCache, procedure.state());
@@ -170,7 +174,7 @@ async fn test_drop_view_success() {
    // Drop again
    let task = new_drop_view_task(view_name, view_id, false);
-    let mut procedure = DropViewProcedure::new(task, ddl_context);
+    let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
    let err = procedure.on_prepare().await.unwrap_err();
    assert_eq!(err.status_code(), StatusCode::TableNotFound);
 }
--- a/src/common/meta/src/ddl/truncate_table.rs
+++ b/src/common/meta/src/ddl/truncate_table.rs
@@ -39,9 +39,9 @@ use crate::key::table_info::TableInfoValue;
 use crate::key::table_name::TableNameKey;
 use crate::key::DeserializedValueWithBytes;
 use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
 use crate::metrics;
 use crate::rpc::ddl::TruncateTableTask;
 use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
 use crate::{metrics, ClusterId};
 pub struct TruncateTableProcedure {
    context: DdlContext,
@@ -91,6 +91,7 @@ impl TruncateTableProcedure {
    pub(crate) const TYPE_NAME: &'static str = "metasrv-procedure::TruncateTable";
    pub(crate) fn new(
        cluster_id: ClusterId,
        task: TruncateTableTask,
        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
        region_routes: Vec<RegionRoute>,
@@ -98,7 +99,7 @@ impl TruncateTableProcedure {
    ) -> Self {
        Self {
            context,
-            data: TruncateTableData::new(task, table_info_value, region_routes),
+            data: TruncateTableData::new(cluster_id, task, table_info_value, region_routes),
        }
    }
@@ -188,6 +189,7 @@ impl TruncateTableProcedure {
 #[derive(Debug, Serialize, Deserialize)]
 pub struct TruncateTableData {
    state: TruncateTableState,
    cluster_id: ClusterId,
    task: TruncateTableTask,
    table_info_value: DeserializedValueWithBytes<TableInfoValue>,
    region_routes: Vec<RegionRoute>,
@@ -195,12 +197,14 @@ pub struct TruncateTableData {
 impl TruncateTableData {
    pub fn new(
        cluster_id: ClusterId,
        task: TruncateTableTask,
        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
        region_routes: Vec<RegionRoute>,
    ) -> Self {
        Self {
            state: TruncateTableState::Prepare,
            cluster_id,
            task,
            table_info_value,
            region_routes,
--- a/src/common/meta/src/ddl/utils.rs
+++ b/src/common/meta/src/ddl/utils.rs
@@ -34,6 +34,7 @@ use crate::key::TableMetadataManagerRef;
 use crate::peer::Peer;
 use crate::rpc::ddl::CreateTableTask;
 use crate::rpc::router::RegionRoute;
 use crate::ClusterId;
 /// Adds [Peer] context if the error is unretryable.
 pub fn add_peer_context_if_needed(datanode: Peer) -> impl FnOnce(Error) -> Error {
@@ -143,6 +144,7 @@ pub async fn get_physical_table_id(
 /// Converts a list of [`RegionRoute`] to a list of [`DetectingRegion`].
 pub fn convert_region_routes_to_detecting_regions(
    cluster_id: ClusterId,
    region_routes: &[RegionRoute],
 ) -> Vec<DetectingRegion> {
    region_routes
@@ -151,7 +153,7 @@ pub fn convert_region_routes_to_detecting_regions(
            route
                .leader_peer
                .as_ref()
-                .map(|peer| (peer.id, route.region.id))
+                .map(|peer| (cluster_id, peer.id, route.region.id))
        })
        .collect::<Vec<_>>()
 }
--- a/src/common/meta/src/ddl_manager.rs
+++ b/src/common/meta/src/ddl_manager.rs
@@ -60,6 +60,7 @@ use crate::rpc::ddl::{
 use crate::rpc::procedure;
 use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
 use crate::rpc::router::RegionRoute;
 use crate::ClusterId;
 pub type DdlManagerRef = Arc<DdlManager>;
@@ -153,12 +154,13 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_alter_table_task(
        &self,
        cluster_id: ClusterId,
        table_id: TableId,
        alter_table_task: AlterTableTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = AlterTableProcedure::new(table_id, alter_table_task, context)?;
+        let procedure = AlterTableProcedure::new(cluster_id, table_id, alter_table_task, context)?;
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -169,11 +171,12 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_create_table_task(
        &self,
        cluster_id: ClusterId,
        create_table_task: CreateTableTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = CreateTableProcedure::new(create_table_task, context);
+        let procedure = CreateTableProcedure::new(cluster_id, create_table_task, context);
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -184,11 +187,12 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_create_view_task(
        &self,
        cluster_id: ClusterId,
        create_view_task: CreateViewTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = CreateViewProcedure::new(create_view_task, context);
+        let procedure = CreateViewProcedure::new(cluster_id, create_view_task, context);
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -199,13 +203,18 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_create_logical_table_tasks(
        &self,
        cluster_id: ClusterId,
        create_table_tasks: Vec<CreateTableTask>,
        physical_table_id: TableId,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure =
+        let procedure = CreateLogicalTablesProcedure::new(
-            CreateLogicalTablesProcedure::new(create_table_tasks, physical_table_id, context);
+            cluster_id,
            create_table_tasks,
            physical_table_id,
            context,
        );
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -216,13 +225,18 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_alter_logical_table_tasks(
        &self,
        cluster_id: ClusterId,
        alter_table_tasks: Vec<AlterTableTask>,
        physical_table_id: TableId,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure =
+        let procedure = AlterLogicalTablesProcedure::new(
-            AlterLogicalTablesProcedure::new(alter_table_tasks, physical_table_id, context);
+            cluster_id,
            alter_table_tasks,
            physical_table_id,
            context,
        );
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -233,11 +247,12 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_drop_table_task(
        &self,
        cluster_id: ClusterId,
        drop_table_task: DropTableTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = DropTableProcedure::new(drop_table_task, context);
+        let procedure = DropTableProcedure::new(cluster_id, drop_table_task, context);
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -248,6 +263,7 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_create_database(
        &self,
        _cluster_id: ClusterId,
        CreateDatabaseTask {
            catalog,
            schema,
@@ -267,6 +283,7 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_drop_database(
        &self,
        _cluster_id: ClusterId,
        DropDatabaseTask {
            catalog,
            schema,
@@ -282,10 +299,11 @@ impl DdlManager {
    pub async fn submit_alter_database(
        &self,
        cluster_id: ClusterId,
        alter_database_task: AlterDatabaseTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = AlterDatabaseProcedure::new(alter_database_task, context)?;
+        let procedure = AlterDatabaseProcedure::new(cluster_id, alter_database_task, context)?;
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
        self.submit_procedure(procedure_with_id).await
@@ -295,11 +313,12 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_create_flow_task(
        &self,
        cluster_id: ClusterId,
        create_flow: CreateFlowTask,
        query_context: QueryContext,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = CreateFlowProcedure::new(create_flow, query_context, context);
+        let procedure = CreateFlowProcedure::new(cluster_id, create_flow, query_context, context);
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
        self.submit_procedure(procedure_with_id).await
@@ -309,10 +328,11 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_drop_flow_task(
        &self,
        cluster_id: ClusterId,
        drop_flow: DropFlowTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = DropFlowProcedure::new(drop_flow, context);
+        let procedure = DropFlowProcedure::new(cluster_id, drop_flow, context);
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
        self.submit_procedure(procedure_with_id).await
@@ -322,10 +342,11 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_drop_view_task(
        &self,
        cluster_id: ClusterId,
        drop_view: DropViewTask,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
-        let procedure = DropViewProcedure::new(drop_view, context);
+        let procedure = DropViewProcedure::new(cluster_id, drop_view, context);
        let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
        self.submit_procedure(procedure_with_id).await
@@ -335,12 +356,14 @@ impl DdlManager {
    #[tracing::instrument(skip_all)]
    pub async fn submit_truncate_table_task(
        &self,
        cluster_id: ClusterId,
        truncate_table_task: TruncateTableTask,
        table_info_value: DeserializedValueWithBytes<TableInfoValue>,
        region_routes: Vec<RegionRoute>,
    ) -> Result<(ProcedureId, Option<Output>)> {
        let context = self.create_context();
        let procedure = TruncateTableProcedure::new(
            cluster_id,
            truncate_table_task,
            table_info_value,
            region_routes,
@@ -374,6 +397,7 @@ impl DdlManager {
 async fn handle_truncate_table_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    truncate_table_task: TruncateTableTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let table_id = truncate_table_task.table_id;
@@ -392,7 +416,12 @@ async fn handle_truncate_table_task(
    let table_route = table_route_value.into_inner().region_routes()?.clone();
    let (id, _) = ddl_manager
-        .submit_truncate_table_task(truncate_table_task, table_info_value, table_route)
+        .submit_truncate_table_task(
            cluster_id,
            truncate_table_task,
            table_info_value,
            table_route,
        )
        .await?;
    info!("Table: {table_id} is truncated via procedure_id {id:?}");
@@ -405,6 +434,7 @@ async fn handle_truncate_table_task(
 async fn handle_alter_table_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    alter_table_task: AlterTableTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let table_ref = alter_table_task.table_ref();
@@ -438,7 +468,7 @@ async fn handle_alter_table_task(
    );
    let (id, _) = ddl_manager
-        .submit_alter_table_task(table_id, alter_table_task)
+        .submit_alter_table_task(cluster_id, table_id, alter_table_task)
        .await?;
    info!("Table: {table_id} is altered via procedure_id {id:?}");
@@ -451,10 +481,13 @@ async fn handle_alter_table_task(
 async fn handle_drop_table_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    drop_table_task: DropTableTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let table_id = drop_table_task.table_id;
-    let (id, _) = ddl_manager.submit_drop_table_task(drop_table_task).await?;
+    let (id, _) = ddl_manager
        .submit_drop_table_task(cluster_id, drop_table_task)
        .await?;
    info!("Table: {table_id} is dropped via procedure_id {id:?}");
@@ -466,10 +499,11 @@ async fn handle_drop_table_task(
 async fn handle_create_table_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    create_table_task: CreateTableTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, output) = ddl_manager
-        .submit_create_table_task(create_table_task)
+        .submit_create_table_task(cluster_id, create_table_task)
        .await?;
    let procedure_id = id.to_string();
@@ -491,6 +525,7 @@ async fn handle_create_table_task(
 async fn handle_create_logical_table_tasks(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    create_table_tasks: Vec<CreateTableTask>,
 ) -> Result<SubmitDdlTaskResponse> {
    ensure!(
@@ -507,7 +542,7 @@ async fn handle_create_logical_table_tasks(
    let num_logical_tables = create_table_tasks.len();
    let (id, output) = ddl_manager
-        .submit_create_logical_table_tasks(create_table_tasks, physical_table_id)
+        .submit_create_logical_table_tasks(cluster_id, create_table_tasks, physical_table_id)
        .await?;
    info!("{num_logical_tables} logical tables on physical table: {physical_table_id:?} is created via procedure_id {id:?}");
@@ -533,10 +568,11 @@ async fn handle_create_logical_table_tasks(
 async fn handle_create_database_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    create_database_task: CreateDatabaseTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, _) = ddl_manager
-        .submit_create_database(create_database_task.clone())
+        .submit_create_database(cluster_id, create_database_task.clone())
        .await?;
    let procedure_id = id.to_string();
@@ -553,10 +589,11 @@ async fn handle_create_database_task(
 async fn handle_drop_database_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    drop_database_task: DropDatabaseTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, _) = ddl_manager
-        .submit_drop_database(drop_database_task.clone())
+        .submit_drop_database(cluster_id, drop_database_task.clone())
        .await?;
    let procedure_id = id.to_string();
@@ -573,10 +610,11 @@ async fn handle_drop_database_task(
 async fn handle_alter_database_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    alter_database_task: AlterDatabaseTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, _) = ddl_manager
-        .submit_alter_database(alter_database_task.clone())
+        .submit_alter_database(cluster_id, alter_database_task.clone())
        .await?;
    let procedure_id = id.to_string();
@@ -594,10 +632,11 @@ async fn handle_alter_database_task(
 async fn handle_drop_flow_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    drop_flow_task: DropFlowTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, _) = ddl_manager
-        .submit_drop_flow_task(drop_flow_task.clone())
+        .submit_drop_flow_task(cluster_id, drop_flow_task.clone())
        .await?;
    let procedure_id = id.to_string();
@@ -614,10 +653,11 @@ async fn handle_drop_flow_task(
 async fn handle_drop_view_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    drop_view_task: DropViewTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, _) = ddl_manager
-        .submit_drop_view_task(drop_view_task.clone())
+        .submit_drop_view_task(cluster_id, drop_view_task.clone())
        .await?;
    let procedure_id = id.to_string();
@@ -635,11 +675,12 @@ async fn handle_drop_view_task(
 async fn handle_create_flow_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    create_flow_task: CreateFlowTask,
    query_context: QueryContext,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, output) = ddl_manager
-        .submit_create_flow_task(create_flow_task.clone(), query_context)
+        .submit_create_flow_task(cluster_id, create_flow_task.clone(), query_context)
        .await?;
    let procedure_id = id.to_string();
@@ -671,6 +712,7 @@ async fn handle_create_flow_task(
 async fn handle_alter_logical_table_tasks(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    alter_table_tasks: Vec<AlterTableTask>,
 ) -> Result<SubmitDdlTaskResponse> {
    ensure!(
@@ -691,7 +733,7 @@ async fn handle_alter_logical_table_tasks(
    let num_logical_tables = alter_table_tasks.len();
    let (id, _) = ddl_manager
-        .submit_alter_logical_table_tasks(alter_table_tasks, physical_table_id)
+        .submit_alter_logical_table_tasks(cluster_id, alter_table_tasks, physical_table_id)
        .await?;
    info!("{num_logical_tables} logical tables on physical table: {physical_table_id:?} is altered via procedure_id {id:?}");
@@ -707,10 +749,11 @@ async fn handle_alter_logical_table_tasks(
 /// Handle the `[CreateViewTask]` and returns the DDL response when success.
 async fn handle_create_view_task(
    ddl_manager: &DdlManager,
    cluster_id: ClusterId,
    create_view_task: CreateViewTask,
 ) -> Result<SubmitDdlTaskResponse> {
    let (id, output) = ddl_manager
-        .submit_create_view_task(create_view_task)
+        .submit_create_view_task(cluster_id, create_view_task)
        .await?;
    let procedure_id = id.to_string();
@@ -745,43 +788,55 @@ impl ProcedureExecutor for DdlManager {
            .unwrap_or(TracingContext::from_current_span())
            .attach(tracing::info_span!("DdlManager::submit_ddl_task"));
        async move {
            let cluster_id = ctx.cluster_id.unwrap_or_default();
            debug!("Submitting Ddl task: {:?}", request.task);
            match request.task {
                CreateTable(create_table_task) => {
-                    handle_create_table_task(self, create_table_task).await
+                    handle_create_table_task(self, cluster_id, create_table_task).await
                }
                DropTable(drop_table_task) => {
                    handle_drop_table_task(self, cluster_id, drop_table_task).await
                }
                DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
                AlterTable(alter_table_task) => {
-                    handle_alter_table_task(self, alter_table_task).await
+                    handle_alter_table_task(self, cluster_id, alter_table_task).await
                }
                TruncateTable(truncate_table_task) => {
-                    handle_truncate_table_task(self, truncate_table_task).await
+                    handle_truncate_table_task(self, cluster_id, truncate_table_task).await
                }
                CreateLogicalTables(create_table_tasks) => {
-                    handle_create_logical_table_tasks(self, create_table_tasks).await
+                    handle_create_logical_table_tasks(self, cluster_id, create_table_tasks).await
                }
                AlterLogicalTables(alter_table_tasks) => {
-                    handle_alter_logical_table_tasks(self, alter_table_tasks).await
+                    handle_alter_logical_table_tasks(self, cluster_id, alter_table_tasks).await
                }
                DropLogicalTables(_) => todo!(),
                CreateDatabase(create_database_task) => {
-                    handle_create_database_task(self, create_database_task).await
+                    handle_create_database_task(self, cluster_id, create_database_task).await
                }
                DropDatabase(drop_database_task) => {
-                    handle_drop_database_task(self, drop_database_task).await
+                    handle_drop_database_task(self, cluster_id, drop_database_task).await
                }
                AlterDatabase(alter_database_task) => {
-                    handle_alter_database_task(self, alter_database_task).await
+                    handle_alter_database_task(self, cluster_id, alter_database_task).await
                }
                CreateFlow(create_flow_task) => {
-                    handle_create_flow_task(self, create_flow_task, request.query_context.into())
+                    handle_create_flow_task(
-                        .await
+                        self,
                        cluster_id,
                        create_flow_task,
                        request.query_context.into(),
                    )
                    .await
                }
                DropFlow(drop_flow_task) => {
                    handle_drop_flow_task(self, cluster_id, drop_flow_task).await
                }
                DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
                CreateView(create_view_task) => {
-                    handle_create_view_task(self, create_view_task).await
+                    handle_create_view_task(self, cluster_id, create_view_task).await
                }
                DropView(drop_view_task) => {
                    handle_drop_view_task(self, cluster_id, drop_view_task).await
                }
                DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
            }
        }
        .trace(span)
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -26,10 +26,11 @@ use crate::flow_name::FlowName;
 use crate::key::schema_name::SchemaName;
 use crate::key::FlowId;
 use crate::peer::Peer;
-use crate::{DatanodeId, FlownodeId};
+use crate::{ClusterId, DatanodeId, FlownodeId};
 #[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
 pub struct RegionIdent {
    pub cluster_id: ClusterId,
    pub datanode_id: DatanodeId,
    pub table_id: TableId,
    pub region_number: RegionNumber,
@@ -46,8 +47,8 @@ impl Display for RegionIdent {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
-            "RegionIdent(datanode_id='{}', table_id={}, region_number={}, engine = {})",
+            "RegionIdent(datanode_id='{}.{}', table_id={}, region_number={}, engine = {})",
-            self.datanode_id, self.table_id, self.region_number, self.engine
+            self.cluster_id, self.datanode_id, self.table_id, self.region_number, self.engine
        )
    }
 }
@@ -261,6 +262,7 @@ mod tests {
    fn test_serialize_instruction() {
        let open_region = Instruction::OpenRegion(OpenRegion::new(
            RegionIdent {
                cluster_id: 1,
                datanode_id: 2,
                table_id: 1024,
                region_number: 1,
@@ -275,11 +277,12 @@ mod tests {
        let serialized = serde_json::to_string(&open_region).unwrap();
        assert_eq!(
-            r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
+            r#"{"OpenRegion":{"region_ident":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
            serialized
        );
        let close_region = Instruction::CloseRegion(RegionIdent {
            cluster_id: 1,
            datanode_id: 2,
            table_id: 1024,
            region_number: 1,
@@ -289,7 +292,7 @@ mod tests {
        let serialized = serde_json::to_string(&close_region).unwrap();
        assert_eq!(
-            r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
+            r#"{"CloseRegion":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
            serialized
        );
    }
@@ -304,6 +307,7 @@ mod tests {
    #[test]
    fn test_compatible_serialize_open_region() {
        let region_ident = RegionIdent {
            cluster_id: 1,
            datanode_id: 2,
            table_id: 1024,
            region_number: 1,
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -47,6 +47,8 @@ pub mod test_util;
 pub mod util;
 pub mod wal_options_allocator;
 // The id of the cluster.
 pub type ClusterId = u64;
 // The id of the datanode.
 pub type DatanodeId = u64;
 // The id of the flownode.
--- a/src/common/meta/src/node_expiry_listener.rs
+++ b/src/common/meta/src/node_expiry_listener.rs
@@ -99,7 +99,7 @@ impl NodeExpiryListener {
        in_memory: &ResettableKvBackendRef,
        max_idle_time: Duration,
    ) -> error::Result<impl Iterator<Item = NodeInfoKey>> {
-        let prefix = NodeInfoKey::key_prefix();
+        let prefix = NodeInfoKey::key_prefix_with_cluster_id(0);
        let req = RangeRequest::new().with_prefix(prefix);
        let current_time_millis = common_time::util::current_time_millis();
        let resp = in_memory.range(req).await?;
--- a/src/common/meta/src/peer.rs
+++ b/src/common/meta/src/peer.rs
@@ -19,7 +19,7 @@ use api::v1::meta::Peer as PbPeer;
 use serde::{Deserialize, Serialize};
 use crate::error::Error;
-use crate::{DatanodeId, FlownodeId};
+use crate::{ClusterId, DatanodeId, FlownodeId};
 #[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
 pub struct Peer {
@@ -72,8 +72,8 @@ impl Display for Peer {
 /// can query peer given a node id
 #[async_trait::async_trait]
 pub trait PeerLookupService {
-    async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
+    async fn datanode(&self, cluster_id: ClusterId, id: DatanodeId) -> Result<Option<Peer>, Error>;
-    async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
+    async fn flownode(&self, cluster_id: ClusterId, id: FlownodeId) -> Result<Option<Peer>, Error>;
 }
 pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;
--- a/src/common/meta/src/rpc.rs
+++ b/src/common/meta/src/rpc.rs
@@ -31,6 +31,11 @@ impl ResponseHeader {
        self.0.protocol_version
    }
    #[inline]
    pub fn cluster_id(&self) -> u64 {
        self.0.cluster_id
    }
    #[inline]
    pub fn error_code(&self) -> i32 {
        match self.0.error.as_ref() {
@@ -138,6 +143,7 @@ mod tests {
    fn test_response_header_trans() {
        let pb_header = PbResponseHeader {
            protocol_version: 101,
            cluster_id: 1,
            error: Some(Error {
                code: 100,
                err_msg: "test".to_string(),
@@ -146,6 +152,7 @@ mod tests {
        let header = ResponseHeader(pb_header);
        assert_eq!(101, header.protocol_version());
        assert_eq!(1, header.cluster_id());
        assert_eq!(100, header.error_code());
        assert_eq!("test".to_string(), header.error_msg());
    }
--- a/src/common/meta/src/test_util.rs
+++ b/src/common/meta/src/test_util.rs
@@ -37,7 +37,7 @@ use crate::peer::{Peer, PeerLookupService};
 use crate::region_keeper::MemoryRegionKeeper;
 use crate::sequence::SequenceBuilder;
 use crate::wal_options_allocator::WalOptionsAllocator;
-use crate::{DatanodeId, FlownodeId};
+use crate::{ClusterId, DatanodeId, FlownodeId};
 #[async_trait::async_trait]
 pub trait MockDatanodeHandler: Sync + Send + Clone {
@@ -189,11 +189,11 @@ pub struct NoopPeerLookupService;
 #[async_trait::async_trait]
 impl PeerLookupService for NoopPeerLookupService {
-    async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>> {
+    async fn datanode(&self, _cluster_id: ClusterId, id: DatanodeId) -> Result<Option<Peer>> {
        Ok(Some(Peer::empty(id)))
    }
-    async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>> {
+    async fn flownode(&self, _cluster_id: ClusterId, id: FlownodeId) -> Result<Option<Peer>> {
        Ok(Some(Peer::empty(id)))
    }
 }
--- a/src/common/query/src/error.rs
+++ b/src/common/query/src/error.rs
@@ -24,6 +24,7 @@ use datatypes::arrow::datatypes::DataType as ArrowDatatype;
 use datatypes::error::Error as DataTypeError;
 use datatypes::prelude::ConcreteDataType;
 use snafu::{Location, Snafu};
 use statrs::StatsError;
 #[derive(Snafu)]
 #[snafu(visibility(pub))]
@@ -37,6 +38,14 @@ pub enum Error {
        location: Location,
    },
    #[snafu(display("Failed to generate function"))]
    GenerateFunction {
        #[snafu(source)]
        error: StatsError,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Failed to cast scalar value into vector"))]
    FromScalarValue {
        #[snafu(implicit)]
@@ -88,6 +97,12 @@ pub enum Error {
        location: Location,
    },
    #[snafu(display("unexpected: not constant column"))]
    InvalidInputCol {
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("General DataFusion error"))]
    GeneralDataFusion {
        #[snafu(source)]
@@ -233,6 +248,8 @@ impl ErrorExt for Error {
            Error::CreateAccumulator { .. }
            | Error::DowncastVector { .. }
            | Error::InvalidInputState { .. }
            | Error::InvalidInputCol { .. }
            | Error::GenerateFunction { .. }
            | Error::BadAccumulatorImpl { .. }
            | Error::ToScalarValue { .. }
            | Error::GetScalarVector { .. }
--- a/src/datanode/src/heartbeat/handler.rs
+++ b/src/datanode/src/heartbeat/handler.rs
@@ -235,6 +235,7 @@ mod tests {
        Instruction::CloseRegion(RegionIdent {
            table_id: region_id.table_id(),
            region_number: region_id.region_number(),
            cluster_id: 1,
            datanode_id: 2,
            engine: MITO_ENGINE_NAME.to_string(),
        })
@@ -245,6 +246,7 @@ mod tests {
            RegionIdent {
                table_id: region_id.table_id(),
                region_number: region_id.region_number(),
                cluster_id: 1,
                datanode_id: 2,
                engine: MITO_ENGINE_NAME.to_string(),
            },
--- a/src/datatypes/src/schema/column_schema.rs
+++ b/src/datatypes/src/schema/column_schema.rs
@@ -597,7 +597,7 @@ impl fmt::Display for FulltextAnalyzer {
 }
 /// Skipping options for a column.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
 #[serde(rename_all = "kebab-case")]
 pub struct SkippingIndexOptions {
    /// The granularity of the skip index.
@@ -607,15 +607,6 @@ pub struct SkippingIndexOptions {
    pub index_type: SkippingIndexType,
 }
 impl Default for SkippingIndexOptions {
    fn default() -> Self {
        Self {
            granularity: DEFAULT_GRANULARITY,
            index_type: SkippingIndexType::default(),
        }
    }
 }
 impl fmt::Display for SkippingIndexOptions {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "granularity={}", self.granularity)?;
--- a/src/flow/Cargo.toml
+++ b/src/flow/Cargo.toml
@@ -16,7 +16,6 @@ async-trait.workspace = true
 bytes.workspace = true
 cache.workspace = true
 catalog.workspace = true
 chrono.workspace = true
 client.workspace = true
 common-base.workspace = true
 common-config.workspace = true
--- a/src/flow/src/adapter.rs
+++ b/src/flow/src/adapter.rs
@@ -49,13 +49,12 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
 use crate::adapter::refill::RefillTask;
 use crate::adapter::table_source::ManagedTableSource;
 use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
-pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle};
+pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
 use crate::compute::ErrCollector;
 use crate::df_optimizer::sql_to_flow_plan;
 use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
 use crate::expr::Batch;
 use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
 use crate::recording_rules::RecordingRuleEngine;
 use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
 mod flownode_impl;
@@ -64,7 +63,7 @@ pub(crate) mod refill;
 mod stat;
 #[cfg(test)]
 mod tests;
-pub(crate) mod util;
+mod util;
 mod worker;
 pub(crate) mod node_context;
@@ -104,6 +103,7 @@ impl Default for FlowConfig {
 #[serde(default)]
 pub struct FlownodeOptions {
    pub mode: Mode,
    pub cluster_id: Option<u64>,
    pub node_id: Option<u64>,
    pub flow: FlowConfig,
    pub grpc: GrpcOptions,
@@ -118,6 +118,7 @@ impl Default for FlownodeOptions {
    fn default() -> Self {
        Self {
            mode: servers::Mode::Standalone,
            cluster_id: None,
            node_id: None,
            flow: FlowConfig::default(),
            grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3004"),
@@ -170,8 +171,6 @@ pub struct FlowWorkerManager {
    flush_lock: RwLock<()>,
    /// receive a oneshot sender to send state size report
    state_report_handler: RwLock<Option<StateReportHandler>>,
    /// engine for recording rule
    rule_engine: RecordingRuleEngine,
 }
 /// Building FlownodeManager
@@ -186,7 +185,6 @@ impl FlowWorkerManager {
        node_id: Option<u32>,
        query_engine: Arc<dyn QueryEngine>,
        table_meta: TableMetadataManagerRef,
        rule_engine: RecordingRuleEngine,
    ) -> Self {
        let srv_map = ManagedTableSource::new(
            table_meta.table_info_manager().clone(),
@@ -209,7 +207,6 @@ impl FlowWorkerManager {
            node_id,
            flush_lock: RwLock::new(()),
            state_report_handler: RwLock::new(None),
            rule_engine,
        }
    }
@@ -218,6 +215,25 @@ impl FlowWorkerManager {
        self
    }
    /// Create a flownode manager with one worker
    pub fn new_with_workers<'s>(
        node_id: Option<u32>,
        query_engine: Arc<dyn QueryEngine>,
        table_meta: TableMetadataManagerRef,
        num_workers: usize,
    ) -> (Self, Vec<Worker<'s>>) {
        let mut zelf = Self::new(node_id, query_engine, table_meta);
        let workers: Vec<_> = (0..num_workers)
            .map(|_| {
                let (handle, worker) = create_worker();
                zelf.add_worker_handle(handle);
                worker
            })
            .collect();
        (zelf, workers)
    }
    /// add a worker handler to manager, meaning this corresponding worker is under it's manage
    pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
        self.worker_handles.push(handle);
@@ -735,11 +751,7 @@ pub struct CreateFlowArgs {
 /// Create&Remove flow
 impl FlowWorkerManager {
    /// remove a flow by it's id
    #[allow(unreachable_code)]
    pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
        // TODO(discord9): reroute some back to streaming engine later
        return self.rule_engine.remove_flow(flow_id).await;
        for handle in self.worker_handles.iter() {
            if handle.contains_flow(flow_id).await? {
                handle.remove_flow(flow_id).await?;
@@ -755,10 +767,8 @@ impl FlowWorkerManager {
    /// steps to create task:
    /// 1. parse query into typed plan(and optional parse expire_after expr)
    /// 2. render source/sink with output table id and used input table id
-    #[allow(clippy::too_many_arguments, unreachable_code)]
+    #[allow(clippy::too_many_arguments)]
    pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
        // TODO(discord9): reroute some back to streaming engine later
        return self.rule_engine.create_flow(args).await;
        let CreateFlowArgs {
            flow_id,
            sink_table_name,
--- a/src/flow/src/adapter/flownode_impl.rs
+++ b/src/flow/src/adapter/flownode_impl.rs
@@ -153,13 +153,7 @@ impl Flownode for FlowWorkerManager {
        }
    }
    #[allow(unreachable_code, unused)]
    async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
        return self
            .rule_engine
            .handle_inserts(request)
            .await
            .map_err(to_meta_err(snafu::location!()));
        // using try_read to ensure two things:
        // 1. flush wouldn't happen until inserts before it is inserted
        // 2. inserts happening concurrently with flush wouldn't be block by flush
@@ -212,15 +206,15 @@ impl Flownode for FlowWorkerManager {
                    .collect_vec();
                let table_col_names = table_schema.relation_desc.names;
                let table_col_names = table_col_names
-                        .iter().enumerate()
+                    .iter().enumerate()
-                        .map(|(idx,name)| match name {
+                    .map(|(idx,name)| match name {
-                            Some(name) => Ok(name.clone()),
+                        Some(name) => Ok(name.clone()),
-                            None => InternalSnafu {
+                        None => InternalSnafu {
-                                reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
+                            reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
-                            }
+                        }
-                            .fail().map_err(BoxedError::new).context(ExternalSnafu),
+                        .fail().map_err(BoxedError::new).context(ExternalSnafu),
-                        })
+                    })
-                        .collect::<Result<Vec<_>>>()?;
+                    .collect::<Result<Vec<_>>>()?;
                let name_to_col = HashMap::<_, _>::from_iter(
                    insert_schema
                        .iter()
--- a/src/flow/src/adapter/util.rs
+++ b/src/flow/src/adapter/util.rs
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //! Some utility functions
 use std::sync::Arc;
 use api::helper::ColumnDataTypeWrapper;
--- a/src/flow/src/error.rs
+++ b/src/flow/src/error.rs
@@ -16,7 +16,6 @@
 use std::any::Any;
 use arrow_schema::ArrowError;
 use common_error::ext::BoxedError;
 use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
 use common_macro::stack_trace_debug;
@@ -54,13 +53,6 @@ pub enum Error {
        location: Location,
    },
    #[snafu(display("Time error"))]
    Time {
        source: common_time::error::Error,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("External error"))]
    External {
        source: BoxedError,
@@ -164,15 +156,6 @@ pub enum Error {
        location: Location,
    },
    #[snafu(display("Arrow error: {raw:?} in context: {context}"))]
    Arrow {
        #[snafu(source)]
        raw: ArrowError,
        context: String,
        #[snafu(implicit)]
        location: Location,
    },
    #[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
    Datafusion {
        #[snafu(source)]
@@ -247,7 +230,6 @@ impl ErrorExt for Error {
        match self {
            Self::Eval { .. }
            | Self::JoinTask { .. }
            | Self::Arrow { .. }
            | Self::Datafusion { .. }
            | Self::InsertIntoFlow { .. } => StatusCode::Internal,
            Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
@@ -256,9 +238,7 @@ impl ErrorExt for Error {
            | Self::FlowNotFound { .. }
            | Self::ListFlows { .. } => StatusCode::TableNotFound,
            Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
-            Self::InvalidQuery { .. } | Self::CreateFlow { .. } | Self::Time { .. } => {
+            Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
                StatusCode::EngineExecuteQuery
            }
            Self::Unexpected { .. } => StatusCode::Unexpected,
            Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
                StatusCode::Unsupported
--- a/src/flow/src/expr/utils.rs
+++ b/src/flow/src/expr/utils.rs
@@ -238,7 +238,6 @@ mod test {
        for (sql, current, expected) in &testcases {
            let plan = sql_to_substrait(engine.clone(), sql).await;
            let mut ctx = create_test_ctx();
            let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
                .await
--- a/src/flow/src/heartbeat.rs
+++ b/src/flow/src/heartbeat.rs
@@ -130,6 +130,13 @@ impl HeartbeatTask {
    pub fn shutdown(&self) {
        info!("Close heartbeat task for flownode");
        if self
            .running
            .compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
            .is_err()
        {
            warn!("Call close heartbeat task multiple times");
        }
    }
    fn new_heartbeat_request(
--- a/src/flow/src/lib.rs
+++ b/src/flow/src/lib.rs
@@ -33,7 +33,6 @@ mod expr;
 pub mod heartbeat;
 mod metrics;
 mod plan;
 mod recording_rules;
 mod repr;
 mod server;
 mod transform;
@@ -44,5 +43,4 @@ mod test_utils;
 pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
 pub use error::{Error, Result};
 pub use recording_rules::FrontendClient;
 pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};
--- a/src/flow/src/metrics.rs
+++ b/src/flow/src/metrics.rs
@@ -28,32 +28,6 @@ lazy_static! {
        &["table_id"]
    )
    .unwrap();
    pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
        "greptime_flow_rule_engine_query_time",
        "flow rule engine query time",
        &["flow_id"],
        vec![
            0.0,
            1.,
            3.,
            5.,
            10.,
            20.,
            30.,
            60.,
            2. * 60.,
            5. * 60.,
            10. * 60.
        ]
    )
    .unwrap();
    pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
        "greptime_flow_rule_engine_slow_query",
        "flow rule engine slow query",
        &["flow_id", "sql", "peer"],
        vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
    )
    .unwrap();
    pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
        register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
    pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(
--- a/src/flow/src/recording_rules.rs
+++ b/src/flow/src/recording_rules.rs
@@ -1,940 +0,0 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
 mod engine;
 mod frontend_client;
 use std::collections::BTreeSet;
 use std::sync::Arc;
 use api::helper::pb_value_to_value_ref;
 use catalog::CatalogManagerRef;
 use common_error::ext::BoxedError;
 use common_recordbatch::DfRecordBatch;
 use common_telemetry::warn;
 use common_time::timestamp::TimeUnit;
 use common_time::Timestamp;
 use datafusion::error::Result as DfResult;
 use datafusion::logical_expr::Expr;
 use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
 use datafusion::prelude::SessionContext;
 use datafusion::sql::unparser::Unparser;
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
 use datafusion_common::{DFSchema, TableReference};
 use datafusion_expr::{ColumnarValue, LogicalPlan};
 use datafusion_physical_expr::PhysicalExprRef;
 use datatypes::prelude::{ConcreteDataType, DataType};
 use datatypes::scalars::ScalarVector;
 use datatypes::schema::TIME_INDEX_KEY;
 use datatypes::value::Value;
 use datatypes::vectors::{
    TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
    TimestampSecondVector, Vector,
 };
 pub use engine::RecordingRuleEngine;
 pub use frontend_client::FrontendClient;
 use itertools::Itertools;
 use query::parser::QueryLanguageParser;
 use query::QueryEngineRef;
 use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
 use crate::adapter::util::from_proto_to_data_type;
 use crate::df_optimizer::apply_df_optimizer;
 use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
 use crate::expr::error::DataTypeSnafu;
 use crate::Error;
 #[derive(Debug, Clone)]
 pub struct TimeWindowExpr {
    phy_expr: PhysicalExprRef,
    column_name: String,
    logical_expr: Expr,
    df_schema: DFSchema,
 }
 impl TimeWindowExpr {
    pub fn from_expr(expr: &Expr, column_name: &str, df_schema: &DFSchema) -> Result<Self, Error> {
        let phy_planner = DefaultPhysicalPlanner::default();
        let phy_expr: PhysicalExprRef = phy_planner
            .create_physical_expr(expr, df_schema, &SessionContext::new().state())
            .with_context(|_e| DatafusionSnafu {
                context: format!(
                    "Failed to create physical expression from {expr:?} using {df_schema:?}"
                ),
            })?;
        Ok(Self {
            phy_expr,
            column_name: column_name.to_string(),
            logical_expr: expr.clone(),
            df_schema: df_schema.clone(),
        })
    }
    pub fn eval(
        &self,
        current: Timestamp,
    ) -> Result<(Option<Timestamp>, Option<Timestamp>), Error> {
        let lower_bound =
            find_expr_time_window_lower_bound(&self.logical_expr, &self.df_schema, current)?;
        let upper_bound =
            find_expr_time_window_upper_bound(&self.logical_expr, &self.df_schema, current)?;
        Ok((lower_bound, upper_bound))
    }
    /// Find timestamps from rows using time window expr
    pub async fn handle_rows(
        &self,
        rows_list: Vec<api::v1::Rows>,
    ) -> Result<BTreeSet<Timestamp>, Error> {
        let mut time_windows = BTreeSet::new();
        for rows in rows_list {
            // pick the time index column and use it to eval on `self.expr`
            let ts_col_index = rows
                .schema
                .iter()
                .map(|col| col.column_name.clone())
                .position(|name| name == self.column_name);
            let Some(ts_col_index) = ts_col_index else {
                warn!("can't found time index column in schema: {:?}", rows.schema);
                continue;
            };
            let col_schema = &rows.schema[ts_col_index];
            let cdt = from_proto_to_data_type(col_schema)?;
            let column_values = rows
                .rows
                .iter()
                .map(|row| &row.values[ts_col_index])
                .collect_vec();
            let mut vector = cdt.create_mutable_vector(column_values.len());
            for value in column_values {
                let value = pb_value_to_value_ref(value, &None);
                vector.try_push_value_ref(value).context(DataTypeSnafu {
                    msg: "Failed to convert rows to columns",
                })?;
            }
            let vector = vector.to_vector();
            let df_schema = create_df_schema_for_ts_column(&self.column_name, cdt)?;
            let rb =
                DfRecordBatch::try_new(df_schema.inner().clone(), vec![vector.to_arrow_array()])
                    .with_context(|_e| ArrowSnafu {
                        context: format!(
                            "Failed to create record batch from {df_schema:?} and {vector:?}"
                        ),
                    })?;
            let eval_res = self
                .phy_expr
                .evaluate(&rb)
                .with_context(|_| DatafusionSnafu {
                    context: format!(
                        "Failed to evaluate physical expression {:?} on {rb:?}",
                        self.phy_expr
                    ),
                })?;
            let res = columnar_to_ts_vector(&eval_res)?;
            for ts in res.into_iter().flatten() {
                time_windows.insert(ts);
            }
        }
        Ok(time_windows)
    }
 }
 fn create_df_schema_for_ts_column(name: &str, cdt: ConcreteDataType) -> Result<DFSchema, Error> {
    let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
        name,
        cdt.as_arrow_type(),
        false,
    )]));
    let df_schema = DFSchema::from_field_specific_qualified_schema(
        vec![Some(TableReference::bare("TimeIndexOnlyTable"))],
        &arrow_schema,
    )
    .with_context(|_e| DatafusionSnafu {
        context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
    })?;
    Ok(df_schema)
 }
 /// Convert `ColumnarValue` to `Vec<Option<Timestamp>>`
 fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestamp>>, Error> {
    let val = match columnar {
        datafusion_expr::ColumnarValue::Array(array) => {
            let ty = array.data_type();
            let ty = ConcreteDataType::from_arrow_type(ty);
            let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
                ty.unit()
            } else {
                return UnexpectedSnafu {
                    reason: format!("Non-timestamp type: {ty:?}"),
                }
                .fail();
            };
            match time_unit {
                TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
                    .with_context(|_| DatatypesSnafu {
                        extra: format!("Failed to create vector from arrow array {array:?}"),
                    })?
                    .iter_data()
                    .map(|d| d.map(|d| d.0))
                    .collect_vec(),
                TimeUnit::Millisecond => {
                    TimestampMillisecondVector::try_from_arrow_array(array.clone())
                        .with_context(|_| DatatypesSnafu {
                            extra: format!("Failed to create vector from arrow array {array:?}"),
                        })?
                        .iter_data()
                        .map(|d| d.map(|d| d.0))
                        .collect_vec()
                }
                TimeUnit::Microsecond => {
                    TimestampMicrosecondVector::try_from_arrow_array(array.clone())
                        .with_context(|_| DatatypesSnafu {
                            extra: format!("Failed to create vector from arrow array {array:?}"),
                        })?
                        .iter_data()
                        .map(|d| d.map(|d| d.0))
                        .collect_vec()
                }
                TimeUnit::Nanosecond => {
                    TimestampNanosecondVector::try_from_arrow_array(array.clone())
                        .with_context(|_| DatatypesSnafu {
                            extra: format!("Failed to create vector from arrow array {array:?}"),
                        })?
                        .iter_data()
                        .map(|d| d.map(|d| d.0))
                        .collect_vec()
                }
            }
        }
        datafusion_expr::ColumnarValue::Scalar(scalar) => {
            let value = Value::try_from(scalar.clone()).with_context(|_| DatatypesSnafu {
                extra: format!("Failed to convert scalar {scalar:?} to value"),
            })?;
            let ts = value.as_timestamp().context(UnexpectedSnafu {
                reason: format!("Expect Timestamp, found {:?}", value),
            })?;
            vec![Some(ts)]
        }
    };
    Ok(val)
 }
 /// Convert sql to datafusion logical plan
 pub async fn sql_to_df_plan(
    query_ctx: QueryContextRef,
    engine: QueryEngineRef,
    sql: &str,
    optimize: bool,
 ) -> Result<LogicalPlan, Error> {
    let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
        .map_err(BoxedError::new)
        .context(ExternalSnafu)?;
    let plan = engine
        .planner()
        .plan(&stmt, query_ctx)
        .await
        .map_err(BoxedError::new)
        .context(ExternalSnafu)?;
    let plan = if optimize {
        apply_df_optimizer(plan).await?
    } else {
        plan
    };
    Ok(plan)
 }
 /// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
 async fn find_time_window_expr(
    plan: &LogicalPlan,
    catalog_man: CatalogManagerRef,
    query_ctx: QueryContextRef,
 ) -> Result<(String, Option<datafusion_expr::Expr>, TimeUnit, DFSchema), Error> {
    // TODO(discord9): find the expr that do time window
    let mut table_name = None;
    // first find the table source in the logical plan
    plan.apply(|plan| {
        let LogicalPlan::TableScan(table_scan) = plan else {
            return Ok(TreeNodeRecursion::Continue);
        };
        table_name = Some(table_scan.table_name.clone());
        Ok(TreeNodeRecursion::Stop)
    })
    .with_context(|_| DatafusionSnafu {
        context: format!("Can't find table source in plan {plan:?}"),
    })?;
    let Some(table_name) = table_name else {
        UnexpectedSnafu {
            reason: format!("Can't find table source in plan {plan:?}"),
        }
        .fail()?
    };
    let current_schema = query_ctx.current_schema();
    let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
    let schema_name = table_name.schema().unwrap_or(&current_schema);
    let table_name = table_name.table();
    let Some(table_ref) = catalog_man
        .table(catalog_name, schema_name, table_name, Some(&query_ctx))
        .await
        .map_err(BoxedError::new)
        .context(ExternalSnafu)?
    else {
        UnexpectedSnafu {
            reason: format!(
                "Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
            ),
        }
        .fail()?
    };
    let schema = &table_ref.table_info().meta.schema;
    let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
        reason: format!("Can't find timestamp column in table {table_name:?}"),
    })?;
    let ts_col_name = ts_index.name.clone();
    let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
        reason: format!(
            "Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
        ),
    })?.unit();
    let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
        ts_col_name.clone(),
        ts_index.data_type.as_arrow_type(),
        false,
    )]));
    let df_schema = DFSchema::from_field_specific_qualified_schema(
        vec![Some(TableReference::bare(table_name))],
        &arrow_schema,
    )
    .with_context(|_e| DatafusionSnafu {
        context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
    })?;
    // find the time window expr which refers to the time index column
    let mut aggr_expr = None;
    let mut time_window_expr: Option<Expr> = None;
    let find_inner_aggr_expr = |plan: &LogicalPlan| {
        if let LogicalPlan::Aggregate(aggregate) = plan {
            aggr_expr = Some(aggregate.clone());
        };
        Ok(TreeNodeRecursion::Continue)
    };
    plan.apply(find_inner_aggr_expr)
        .with_context(|_| DatafusionSnafu {
            context: format!("Can't find aggr expr in plan {plan:?}"),
        })?;
    if let Some(aggregate) = aggr_expr {
        for group_expr in &aggregate.group_expr {
            let refs = group_expr.column_refs();
            if refs.len() != 1 {
                continue;
            }
            let ref_col = refs.iter().next().unwrap();
            let index = aggregate.input.schema().maybe_index_of_column(ref_col);
            let Some(index) = index else {
                continue;
            };
            let field = aggregate.input.schema().field(index);
            let is_time_index = field.metadata().get(TIME_INDEX_KEY) == Some(&"true".to_string());
            if is_time_index {
                let rewrite_column = group_expr.clone();
                let rewritten = rewrite_column
                    .rewrite(&mut RewriteColumn {
                        table_name: table_name.to_string(),
                    })
                    .with_context(|_| DatafusionSnafu {
                        context: format!("Rewrite expr failed, expr={:?}", group_expr),
                    })?
                    .data;
                struct RewriteColumn {
                    table_name: String,
                }
                impl TreeNodeRewriter for RewriteColumn {
                    type Node = Expr;
                    fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
                        let Expr::Column(mut column) = node else {
                            return Ok(Transformed::no(node));
                        };
                        column.relation = Some(TableReference::bare(self.table_name.clone()));
                        Ok(Transformed::yes(Expr::Column(column)))
                    }
                }
                time_window_expr = Some(rewritten);
                break;
            }
        }
        Ok((ts_col_name, time_window_expr, expected_time_unit, df_schema))
    } else {
        // can't found time window expr, return None
        Ok((ts_col_name, None, expected_time_unit, df_schema))
    }
 }
 /// Find nearest lower bound for time `current` in given `plan` for the time window expr.
 /// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
 /// return `Some("2021-07-01 00:00:00.000")`
 /// if `plan` doesn't contain a `TIME INDEX` column, return `None`
 ///
 /// Time window expr is a expr that:
 /// 1. ref only to a time index column
 /// 2. is monotonic increasing
 /// 3. show up in GROUP BY clause
 ///
 /// note this plan should only contain one TableScan
 pub async fn find_plan_time_window_bound(
    plan: &LogicalPlan,
    current: Timestamp,
    query_ctx: QueryContextRef,
    engine: QueryEngineRef,
 ) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
    // TODO(discord9): find the expr that do time window
    let catalog_man = engine.engine_state().catalog_manager();
    let (ts_col_name, time_window_expr, expected_time_unit, df_schema) =
        find_time_window_expr(plan, catalog_man.clone(), query_ctx).await?;
    // cast current to ts_index's type
    let new_current = current
        .convert_to(expected_time_unit)
        .with_context(|| UnexpectedSnafu {
            reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
        })?;
    // if no time_window_expr is found, return None
    if let Some(time_window_expr) = time_window_expr {
        let lower_bound =
            find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
        let upper_bound =
            find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
        Ok((ts_col_name, lower_bound, upper_bound))
    } else {
        Ok((ts_col_name, None, None))
    }
 }
 /// Find the lower bound of time window in given `expr` and `current` timestamp.
 ///
 /// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
 /// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
 /// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
 /// of current time window given the current timestamp
 ///
 /// if return None, meaning this time window have no lower bound
 fn find_expr_time_window_lower_bound(
    expr: &Expr,
    df_schema: &DFSchema,
    current: Timestamp,
 ) -> Result<Option<Timestamp>, Error> {
    let phy_planner = DefaultPhysicalPlanner::default();
    let phy_expr: PhysicalExprRef = phy_planner
        .create_physical_expr(expr, df_schema, &SessionContext::new().state())
        .with_context(|_e| DatafusionSnafu {
            context: format!(
                "Failed to create physical expression from {expr:?} using {df_schema:?}"
            ),
        })?;
    let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
    let input_time_unit = cur_time_window.unit();
    Ok(cur_time_window.convert_to(input_time_unit))
 }
 /// Find the upper bound for time window expression
 fn find_expr_time_window_upper_bound(
    expr: &Expr,
    df_schema: &DFSchema,
    current: Timestamp,
 ) -> Result<Option<Timestamp>, Error> {
    use std::cmp::Ordering;
    let phy_planner = DefaultPhysicalPlanner::default();
    let phy_expr: PhysicalExprRef = phy_planner
        .create_physical_expr(expr, df_schema, &SessionContext::new().state())
        .with_context(|_e| DatafusionSnafu {
            context: format!(
                "Failed to create physical expression from {expr:?} using {df_schema:?}"
            ),
        })?;
    let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
    // search to find the lower bound
    let mut offset: i64 = 1;
    let mut lower_bound = Some(current);
    let upper_bound;
    // first expontial probe to found a range for binary search
    loop {
        let Some(next_val) = current.value().checked_add(offset) else {
            // no upper bound if overflow
            return Ok(None);
        };
        let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
        let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
        match next_time_window.cmp(&cur_time_window) {
            Ordering::Less => {UnexpectedSnafu {
                reason: format!(
                    "Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
                ),
            }
            .fail()?
            }
            Ordering::Equal => {
                lower_bound = Some(next_time_probe);
            }
            Ordering::Greater => {
                upper_bound = Some(next_time_probe);
                break
            }
        }
        let Some(new_offset) = offset.checked_mul(2) else {
            // no upper bound if overflow
            return Ok(None);
        };
        offset = new_offset;
    }
    // binary search for the exact upper bound
    ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
        reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
    });
    let output_unit = upper_bound
        .context(UnexpectedSnafu {
            reason: "should have lower bound",
        })?
        .unit();
    let mut low = lower_bound
        .context(UnexpectedSnafu {
            reason: "should have lower bound",
        })?
        .value();
    let mut high = upper_bound
        .context(UnexpectedSnafu {
            reason: "should have upper bound",
        })?
        .value();
    while low < high {
        let mid = (low + high) / 2;
        let mid_probe = common_time::Timestamp::new(mid, output_unit);
        let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
        match mid_time_window.cmp(&cur_time_window) {
            Ordering::Less => UnexpectedSnafu {
                reason: format!("Binary search failed for time window expression {expr:?}"),
            }
            .fail()?,
            Ordering::Equal => low = mid + 1,
            Ordering::Greater => high = mid,
        }
    }
    let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
    Ok(Some(final_upper_bound_for_time_window))
 }
 fn eval_ts_to_ts(
    phy: &PhysicalExprRef,
    df_schema: &DFSchema,
    input_value: Timestamp,
 ) -> Result<Timestamp, Error> {
    let schema_ty = df_schema.field(0).data_type();
    let schema_cdt = ConcreteDataType::from_arrow_type(schema_ty);
    let schema_unit = if let ConcreteDataType::Timestamp(ts) = schema_cdt {
        ts.unit()
    } else {
        return UnexpectedSnafu {
            reason: format!("Expect Timestamp, found {:?}", schema_cdt),
        }
        .fail();
    };
    let input_value = input_value
        .convert_to(schema_unit)
        .with_context(|| UnexpectedSnafu {
            reason: format!("Failed to convert timestamp {input_value:?} to {schema_unit}"),
        })?;
    let ts_vector = match schema_unit {
        TimeUnit::Second => {
            TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
        }
        TimeUnit::Millisecond => {
            TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
        }
        TimeUnit::Microsecond => {
            TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
        }
        TimeUnit::Nanosecond => {
            TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
        }
    };
    let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
        .with_context(|_| ArrowSnafu {
            context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
        })?;
    let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
        context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
    })?;
    if let Some(Some(ts)) = columnar_to_ts_vector(&eval_res)?.first() {
        Ok(*ts)
    } else {
        UnexpectedSnafu {
            reason: format!(
                "Expected timestamp in expression {phy:?} but got {:?}",
                eval_res
            ),
        }
        .fail()?
    }
 }
 // TODO(discord9): a method to found out the precise time window
 /// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
 #[derive(Debug)]
 pub struct AddFilterRewriter {
    extra_filter: Expr,
    is_rewritten: bool,
 }
 impl AddFilterRewriter {
    fn new(filter: Expr) -> Self {
        Self {
            extra_filter: filter,
            is_rewritten: false,
        }
    }
 }
 impl TreeNodeRewriter for AddFilterRewriter {
    type Node = LogicalPlan;
    fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
        if self.is_rewritten {
            return Ok(Transformed::no(node));
        }
        match node {
            LogicalPlan::Filter(mut filter) if !filter.having => {
                filter.predicate = filter.predicate.and(self.extra_filter.clone());
                self.is_rewritten = true;
                Ok(Transformed::yes(LogicalPlan::Filter(filter)))
            }
            LogicalPlan::TableScan(_) => {
                // add a new filter
                let filter =
                    datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
                self.is_rewritten = true;
                Ok(Transformed::yes(LogicalPlan::Filter(filter)))
            }
            _ => Ok(Transformed::no(node)),
        }
    }
 }
 fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
    /// A dialect that forces all identifiers to be quoted
    struct ForceQuoteIdentifiers;
    impl datafusion::sql::unparser::dialect::Dialect for ForceQuoteIdentifiers {
        fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
            if identifier.to_lowercase() != identifier {
                Some('"')
            } else {
                None
            }
        }
    }
    let unparser = Unparser::new(&ForceQuoteIdentifiers);
    // first make all column qualified
    let sql = unparser
        .plan_to_sql(plan)
        .with_context(|_e| DatafusionSnafu {
            context: format!("Failed to unparse logical plan {plan:?}"),
        })?;
    Ok(sql.to_string())
 }
 #[cfg(test)]
 mod test {
    use datafusion_common::tree_node::TreeNode;
    use pretty_assertions::assert_eq;
    use session::context::QueryContext;
    use super::{sql_to_df_plan, *};
    use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
    use crate::test_utils::create_test_query_engine;
    #[tokio::test]
    async fn test_sql_plan_convert() {
        let query_engine = create_test_query_engine();
        let ctx = QueryContext::arc();
        let old = r#"SELECT "NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#;
        let new = sql_to_df_plan(ctx.clone(), query_engine.clone(), old, false)
            .await
            .unwrap();
        let new_sql = df_plan_to_sql(&new).unwrap();
        assert_eq!(
            r#"SELECT "UPPERCASE_NUMBERS_WITH_TS"."NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#,
            new_sql
        );
    }
    #[tokio::test]
    async fn test_add_filter() {
        let testcases = vec![
            (
                "SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
            ),
            (
                "SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
                "SELECT numbers_with_ts.number FROM numbers_with_ts WHERE ((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4)"
            ),
            (
                "SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
                "SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
            )
        ];
        use datafusion_expr::{col, lit};
        let query_engine = create_test_query_engine();
        let ctx = QueryContext::arc();
        for (before, after) in testcases {
            let sql = before;
            let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
                .await
                .unwrap();
            let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
            let plan = plan.rewrite(&mut add_filter).unwrap().data;
            let new_sql = df_plan_to_sql(&plan).unwrap();
            assert_eq!(after, new_sql);
        }
    }
    #[tokio::test]
    async fn test_plan_time_window_lower_bound() {
        use datafusion_expr::{col, lit};
        let query_engine = create_test_query_engine();
        let ctx = QueryContext::arc();
        let testcases = [
            // same alias is not same column
            (
                "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
                Timestamp::new(1740394109, TimeUnit::Second),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
                    Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
                ),
                r#"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"#
            ),
            // complex time window index
            (
                "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
                Timestamp::new(1740394109, TimeUnit::Second),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(1740394080, TimeUnit::Second)),
                    Some(Timestamp::new(1740394140, TimeUnit::Second)),
                ),
                "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
            ),
            // no time index
            (
                "SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
                Timestamp::new(23, TimeUnit::Millisecond),
                ("ts".to_string(), None, None),
                "SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
            ),
            // time index
            (
                "SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
                Timestamp::new(23, TimeUnit::Nanosecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
            ),
            // on spot
            (
                "SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
                Timestamp::new(0, TimeUnit::Nanosecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
            ),
            // different time unit
            (
                "SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
                Timestamp::new(23_000_000, TimeUnit::Nanosecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
            ),
            // time index with other fields
            (
                "SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
                Timestamp::new(23, TimeUnit::Millisecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
            ),
            // time index with other pks
            (
                "SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
                Timestamp::new(23, TimeUnit::Millisecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
            ),
            // subquery
            (
                "SELECT number, time_window FROM (SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number);",
                Timestamp::new(23, TimeUnit::Millisecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT numbers_with_ts.number, time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number)"
            ),
            // cte
            (
                "with cte as (select number, date_bin('5 minutes', ts) as time_window from numbers_with_ts GROUP BY time_window, number) select number, time_window from cte;",
                Timestamp::new(23, TimeUnit::Millisecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT cte.number, cte.time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number) AS cte"
            ),
            // complex subquery without alias
            (
                "SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) GROUP BY number, time_window, bucket_name;",
                Timestamp::new(23, TimeUnit::Millisecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT sum(numbers_with_ts.number), numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window, bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) GROUP BY numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts), bucket_name"
            ),
            // complex subquery alias
            (
                "SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) as cte GROUP BY number, time_window, bucket_name;",
                Timestamp::new(23, TimeUnit::Millisecond),
                (
                    "ts".to_string(),
                    Some(Timestamp::new(0, TimeUnit::Millisecond)),
                    Some(Timestamp::new(300000, TimeUnit::Millisecond)),
                ),
                "SELECT sum(cte.number), cte.number, date_bin('5 minutes', cte.ts) AS time_window, cte.bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) AS cte GROUP BY cte.number, date_bin('5 minutes', cte.ts), cte.bucket_name"
            ),
        ];
        for (sql, current, expected, expected_unparsed) in testcases {
            let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
                .await
                .unwrap();
            let real =
                find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
                    .await
                    .unwrap();
            assert_eq!(expected, real);
            let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
                .await
                .unwrap();
            let (col_name, lower, upper) = real;
            let new_sql = if lower.is_some() {
                let to_df_literal = |value| {
                    let value = Value::from(value);
                    value.try_to_scalar_value(&value.data_type()).unwrap()
                };
                let lower = to_df_literal(lower.unwrap());
                let upper = to_df_literal(upper.unwrap());
                let expr = col(&col_name)
                    .gt_eq(lit(lower))
                    .and(col(&col_name).lt_eq(lit(upper)));
                let mut add_filter = AddFilterRewriter::new(expr);
                let plan = plan.rewrite(&mut add_filter).unwrap().data;
                df_plan_to_sql(&plan).unwrap()
            } else {
                sql.to_string()
            };
            assert_eq!(expected_unparsed, new_sql);
        }
    }
 }
--- a/src/flow/src/recording_rules/engine.rs
+++ b/src/flow/src/recording_rules/engine.rs
@@ -1,815 +0,0 @@
 // Copyright 2023 Greptime Team
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::sync::Arc;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 use api::v1::flow::FlowResponse;
 use common_error::ext::BoxedError;
 use common_meta::ddl::create_flow::FlowType;
 use common_meta::key::flow::FlowMetadataManagerRef;
 use common_meta::key::table_info::TableInfoManager;
 use common_meta::key::TableMetadataManagerRef;
 use common_telemetry::tracing::warn;
 use common_telemetry::{debug, info};
 use common_time::Timestamp;
 use datafusion::sql::unparser::expr_to_sql;
 use datafusion_common::tree_node::TreeNode;
 use datatypes::value::Value;
 use query::QueryEngineRef;
 use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
 use store_api::storage::RegionId;
 use table::metadata::TableId;
 use tokio::sync::oneshot::error::TryRecvError;
 use tokio::sync::{oneshot, RwLock};
 use tokio::time::Instant;
 use super::frontend_client::FrontendClient;
 use super::{df_plan_to_sql, AddFilterRewriter, TimeWindowExpr};
 use crate::adapter::{CreateFlowArgs, FlowId, TableName};
 use crate::error::{
    DatafusionSnafu, DatatypesSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu,
    TimeSnafu, UnexpectedSnafu,
 };
 use crate::metrics::{METRIC_FLOW_RULE_ENGINE_QUERY_TIME, METRIC_FLOW_RULE_ENGINE_SLOW_QUERY};
 use crate::recording_rules::{find_time_window_expr, sql_to_df_plan};
 use crate::Error;
 /// TODO(discord9): make those constants configurable
 /// The default rule engine query timeout is 10 minutes
 pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
 /// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
 pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
 /// TODO(discord9): determine how to configure refresh rate
 pub struct RecordingRuleEngine {
    tasks: RwLock<BTreeMap<FlowId, RecordingRuleTask>>,
    shutdown_txs: RwLock<BTreeMap<FlowId, oneshot::Sender<()>>>,
    frontend_client: Arc<FrontendClient>,
    flow_metadata_manager: FlowMetadataManagerRef,
    table_meta: TableMetadataManagerRef,
    engine: QueryEngineRef,
 }
 impl RecordingRuleEngine {
    pub fn new(
        frontend_client: Arc<FrontendClient>,
        engine: QueryEngineRef,
        flow_metadata_manager: FlowMetadataManagerRef,
        table_meta: TableMetadataManagerRef,
    ) -> Self {
        Self {
            tasks: Default::default(),
            shutdown_txs: Default::default(),
            frontend_client,
            flow_metadata_manager,
            table_meta,
            engine,
        }
    }
    pub async fn handle_inserts(
        &self,
        request: api::v1::region::InsertRequests,
    ) -> Result<FlowResponse, Error> {
        let table_info_mgr = self.table_meta.table_info_manager();
        let mut group_by_table_name: HashMap<TableName, Vec<api::v1::Rows>> = HashMap::new();
        for r in request.requests {
            let tid = RegionId::from(r.region_id).table_id();
            let name = get_table_name(table_info_mgr, &tid).await?;
            let entry = group_by_table_name.entry(name).or_default();
            if let Some(rows) = r.rows {
                entry.push(rows);
            }
        }
        for (_flow_id, task) in self.tasks.read().await.iter() {
            let src_table_names = &task.source_table_names;
            for src_table_name in src_table_names {
                if let Some(entry) = group_by_table_name.get(src_table_name) {
                    let Some(expr) = &task.time_window_expr else {
                        continue;
                    };
                    let involved_time_windows = expr.handle_rows(entry.clone()).await?;
                    let mut state = task.state.write().await;
                    state
                        .dirty_time_windows
                        .add_lower_bounds(involved_time_windows.into_iter());
                }
            }
        }
        Ok(Default::default())
    }
 }
 async fn get_table_name(zelf: &TableInfoManager, table_id: &TableId) -> Result<TableName, Error> {
    zelf.get(*table_id)
        .await
        .map_err(BoxedError::new)
        .context(ExternalSnafu)?
        .with_context(|| UnexpectedSnafu {
            reason: format!("Table id = {:?}, couldn't found table name", table_id),
        })
        .map(|name| name.table_name())
        .map(|name| [name.catalog_name, name.schema_name, name.table_name])
 }
 const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
 impl RecordingRuleEngine {
    pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
        let CreateFlowArgs {
            flow_id,
            sink_table_name,
            source_table_ids,
            create_if_not_exists,
            or_replace,
            expire_after,
            comment: _,
            sql,
            flow_options,
            query_ctx,
        } = args;
        // or replace logic
        {
            let is_exist = self.tasks.read().await.contains_key(&flow_id);
            match (create_if_not_exists, or_replace, is_exist) {
                // if replace, ignore that old flow exists
                (_, true, true) => {
                    info!("Replacing flow with id={}", flow_id);
                }
                (false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
                // already exists, and not replace, return None
                (true, false, true) => {
                    info!("Flow with id={} already exists, do nothing", flow_id);
                    return Ok(None);
                }
                // continue as normal
                (_, _, false) => (),
            }
        }
        let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);
        ensure!(
            flow_type == Some(&FlowType::RecordingRule.to_string()) || flow_type.is_none(),
            UnexpectedSnafu {
                reason: format!("Flow type is not RecordingRule nor None, got {flow_type:?}")
            }
        );
        let Some(query_ctx) = query_ctx else {
            UnexpectedSnafu {
                reason: "Query context is None".to_string(),
            }
            .fail()?
        };
        let query_ctx = Arc::new(query_ctx);
        let mut source_table_names = Vec::new();
        for src_id in source_table_ids {
            let table_name = self
                .table_meta
                .table_info_manager()
                .get(src_id)
                .await
                .map_err(BoxedError::new)
                .context(ExternalSnafu)?
                .with_context(|| UnexpectedSnafu {
                    reason: format!("Table id = {:?}, couldn't found table name", src_id),
                })
                .map(|name| name.table_name())
                .map(|name| [name.catalog_name, name.schema_name, name.table_name])?;
            source_table_names.push(table_name);
        }
        let (tx, rx) = oneshot::channel();
        let plan = sql_to_df_plan(query_ctx.clone(), self.engine.clone(), &sql, true).await?;
        let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
            &plan,
            self.engine.engine_state().catalog_manager().clone(),
            query_ctx.clone(),
        )
        .await?;
        let phy_expr = time_window_expr
            .map(|expr| TimeWindowExpr::from_expr(&expr, &column_name, &df_schema))
            .transpose()?;
        info!("Flow id={}, found time window expr={:?}", flow_id, phy_expr);
        let task = RecordingRuleTask::new(
            flow_id,
            &sql,
            phy_expr,
            expire_after,
            sink_table_name,
            source_table_names,
            query_ctx,
            rx,
        );
        let task_inner = task.clone();
        let engine = self.engine.clone();
        let frontend = self.frontend_client.clone();
        // TODO(discord9): also save handle & use time wheel or what for better
        let _handle = common_runtime::spawn_global(async move {
            match task_inner.start_executing(engine, frontend).await {
                Ok(()) => info!("Flow {} shutdown", task_inner.flow_id),
                Err(err) => common_telemetry::error!(
                    "Flow {} encounter unrecoverable error: {err:?}",
                    task_inner.flow_id
                ),
            }
        });
        // TODO(discord9): deal with replace logic
        let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
        drop(replaced_old_task_opt);
        self.shutdown_txs.write().await.insert(flow_id, tx);
        Ok(Some(flow_id))
    }
    pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
        if self.tasks.write().await.remove(&flow_id).is_none() {
            warn!("Flow {flow_id} not found in tasks")
        }
        let Some(tx) = self.shutdown_txs.write().await.remove(&flow_id) else {
            UnexpectedSnafu {
                reason: format!("Can't found shutdown tx for flow {flow_id}"),
            }
            .fail()?
        };
        if tx.send(()).is_err() {
            warn!("Fail to shutdown flow {flow_id} due to receiver already dropped, maybe flow {flow_id} is already dropped?")
        }
        Ok(())
    }
 }
 #[derive(Debug, Clone)]
 pub struct RecordingRuleTask {
    pub flow_id: FlowId,
    query: String,
    pub time_window_expr: Option<TimeWindowExpr>,
    /// in seconds
    pub expire_after: Option<i64>,
    sink_table_name: [String; 3],
    source_table_names: HashSet<[String; 3]>,
    state: Arc<RwLock<RecordingRuleState>>,
 }
 impl RecordingRuleTask {
    #[allow(clippy::too_many_arguments)]
    pub fn new(
        flow_id: FlowId,
        query: &str,
        time_window_expr: Option<TimeWindowExpr>,
        expire_after: Option<i64>,
        sink_table_name: [String; 3],
        source_table_names: Vec<[String; 3]>,
        query_ctx: QueryContextRef,
        shutdown_rx: oneshot::Receiver<()>,
    ) -> Self {
        Self {
            flow_id,
            query: query.to_string(),
            time_window_expr,
            expire_after,
            sink_table_name,
            source_table_names: source_table_names.into_iter().collect(),
            state: Arc::new(RwLock::new(RecordingRuleState::new(query_ctx, shutdown_rx))),
        }
    }
 }
 impl RecordingRuleTask {
    /// This should be called in a new tokio task
    pub async fn start_executing(
        &self,
        engine: QueryEngineRef,
        frontend_client: Arc<FrontendClient>,
    ) -> Result<(), Error> {
        // only first query don't need upper bound
        let mut is_first = true;
        loop {
            // FIXME(discord9): test if need upper bound also works
            let new_query = self.gen_query_with_time_window(engine.clone()).await?;
            let insert_into = if let Some(new_query) = new_query {
                format!(
                    "INSERT INTO {}.{}.{} {}",
                    self.sink_table_name[0],
                    self.sink_table_name[1],
                    self.sink_table_name[2],
                    new_query
                )
            } else {
                tokio::time::sleep(MIN_REFRESH_DURATION).await;
                continue;
            };
            if is_first {
                is_first = false;
            }
            let instant = Instant::now();
            let flow_id = self.flow_id;
            let db_client = frontend_client.get_database_client().await?;
            let peer_addr = db_client.peer.addr;
            debug!(
                "Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
                self.expire_after, peer_addr, &insert_into
            );
            let timer = METRIC_FLOW_RULE_ENGINE_QUERY_TIME
                .with_label_values(&[flow_id.to_string().as_str()])
                .start_timer();
            let res = db_client.database.sql(&insert_into).await;
            drop(timer);
            let elapsed = instant.elapsed();
            if let Ok(res1) = &res {
                debug!(
                    "Flow {flow_id} executed, result: {res1:?}, elapsed: {:?}",
                    elapsed
                );
            } else if let Err(res) = &res {
                warn!(
                    "Failed to execute Flow {flow_id} on frontend {}, result: {res:?}, elapsed: {:?} with query: {}",
                    peer_addr, elapsed, &insert_into
                );
            }
            // record slow query
            if elapsed >= SLOW_QUERY_THRESHOLD {
                warn!(
                    "Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
                    peer_addr, elapsed, &insert_into
                );
                METRIC_FLOW_RULE_ENGINE_SLOW_QUERY
                    .with_label_values(&[flow_id.to_string().as_str(), &insert_into, &peer_addr])
                    .observe(elapsed.as_secs_f64());
            }
            self.state
                .write()
                .await
                .after_query_exec(elapsed, res.is_ok());
            // drop the result to free client-related resources
            drop(res);
            let sleep_until = {
                let mut state = self.state.write().await;
                match state.shutdown_rx.try_recv() {
                    Ok(()) => break Ok(()),
                    Err(TryRecvError::Closed) => {
                        warn!("Unexpected shutdown flow {flow_id}, shutdown anyway");
                        break Ok(());
                    }
                    Err(TryRecvError::Empty) => (),
                }
                state.get_next_start_query_time(None)
            };
            tokio::time::sleep_until(sleep_until).await;
        }
    }
    /// will merge and use the first ten time window in query
    async fn gen_query_with_time_window(
        &self,
        engine: QueryEngineRef,
    ) -> Result<Option<String>, Error> {
        let query_ctx = self.state.read().await.query_ctx.clone();
        let start = SystemTime::now();
        let since_the_epoch = start
            .duration_since(UNIX_EPOCH)
            .expect("Time went backwards");
        let low_bound = self
            .expire_after
            .map(|e| since_the_epoch.as_secs() - e as u64)
            .unwrap_or(u64::MIN);
        let low_bound = Timestamp::new_second(low_bound as i64);
        // TODO(discord9): use time window expr to get the precise expire lower bound
        let expire_time_window_bound = self
            .time_window_expr
            .as_ref()
            .map(|expr| expr.eval(low_bound))
            .transpose()?;
        let new_sql = {
            let expr = {
                match expire_time_window_bound {
                    Some((Some(l), Some(u))) => {
                        let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
                            reason: format!("Can't get window size from {u:?} - {l:?}"),
                        })?;
                        let col_name = self
                            .time_window_expr
                            .as_ref()
                            .map(|expr| expr.column_name.clone())
                            .with_context(|| UnexpectedSnafu {
                                reason: format!(
                                    "Flow id={:?}, Failed to get column name from time window expr",
                                    self.flow_id
                                ),
                            })?;
                        self.state
                            .write()
                            .await
                            .dirty_time_windows
                            .gen_filter_exprs(&col_name, Some(l), window_size, self)?
                    }
                    _ => {
                        debug!(
                            "Flow id = {:?}, can't get window size: precise_lower_bound={expire_time_window_bound:?}, using the same query", self.flow_id
                        );
                        // since no time window lower/upper bound is found, just return the original query
                        return Ok(Some(self.query.clone()));
                    }
                }
            };
            debug!(
                "Flow id={:?}, Generated filter expr: {:?}",
                self.flow_id,
                expr.as_ref()
                    .map(|expr| expr_to_sql(expr).with_context(|_| DatafusionSnafu {
                        context: format!("Failed to generate filter expr from {expr:?}"),
                    }))
                    .transpose()?
                    .map(|s| s.to_string())
            );
            let Some(expr) = expr else {
                // no new data, hence no need to update
                debug!("Flow id={:?}, no new data, not update", self.flow_id);
                return Ok(None);
            };
            let mut add_filter = AddFilterRewriter::new(expr);
            // make a not optimized plan for clearer unparse
            let plan =
                sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, false).await?;
            let plan = plan
                .clone()
                .rewrite(&mut add_filter)
                .with_context(|_| DatafusionSnafu {
                    context: format!("Failed to rewrite plan {plan:?}"),
                })?
                .data;
            df_plan_to_sql(&plan)?
        };
        Ok(Some(new_sql))
    }
 }
 #[derive(Debug)]
 pub struct RecordingRuleState {
    query_ctx: QueryContextRef,
    /// last query complete time
    last_update_time: Instant,
    /// last time query duration
    last_query_duration: Duration,
    /// Dirty Time windows need to be updated
    /// mapping of `start -> end` and non-overlapping
    dirty_time_windows: DirtyTimeWindows,
    exec_state: ExecState,
    shutdown_rx: oneshot::Receiver<()>,
 }
 #[derive(Debug, Clone, Default)]
 pub struct DirtyTimeWindows {
    windows: BTreeMap<Timestamp, Option<Timestamp>>,
 }
 fn to_df_literal(value: Timestamp) -> Result<datafusion_common::ScalarValue, Error> {
    let value = Value::from(value);
    let value = value
        .try_to_scalar_value(&value.data_type())
        .with_context(|_| DatatypesSnafu {
            extra: format!("Failed to convert to scalar value: {}", value),
        })?;
    Ok(value)
 }
 impl DirtyTimeWindows {
    /// Time window merge distance
    const MERGE_DIST: i32 = 3;
    /// Maximum number of filters allowed in a single query
    const MAX_FILTER_NUM: usize = 20;
    /// Add lower bounds to the dirty time windows. Upper bounds are ignored.
    ///
    /// # Arguments
    ///
    /// * `lower_bounds` - An iterator of lower bounds to be added.
    pub fn add_lower_bounds(&mut self, lower_bounds: impl Iterator<Item = Timestamp>) {
        for lower_bound in lower_bounds {
            let entry = self.windows.entry(lower_bound);
            entry.or_insert(None);
        }
    }
    /// Generate all filter expressions consuming all time windows
    pub fn gen_filter_exprs(
        &mut self,
        col_name: &str,
        expire_lower_bound: Option<Timestamp>,
        window_size: chrono::Duration,
        task_ctx: &RecordingRuleTask,
    ) -> Result<Option<datafusion_expr::Expr>, Error> {
        debug!(
            "expire_lower_bound: {:?}, window_size: {:?}",
            expire_lower_bound.map(|t| t.to_iso8601_string()),
            window_size
        );
        self.merge_dirty_time_windows(window_size, expire_lower_bound)?;
        if self.windows.len() > Self::MAX_FILTER_NUM {
            let first_time_window = self.windows.first_key_value();
            let last_time_window = self.windows.last_key_value();
            warn!(
                "Flow id = {:?}, too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong. Time window expr={:?}, expire_after={:?}, first_time_window={:?}, last_time_window={:?}, the original query: {:?}",
                task_ctx.flow_id,
                self.windows.len(),
                Self::MAX_FILTER_NUM,
                task_ctx.time_window_expr,
                task_ctx.expire_after,
                first_time_window,
                last_time_window,
                task_ctx.query
            );
        }
        // get the first `MAX_FILTER_NUM` time windows
        let nth = self
            .windows
            .iter()
            .nth(Self::MAX_FILTER_NUM)
            .map(|(key, _)| *key);
        let first_nth = {
            if let Some(nth) = nth {
                let mut after = self.windows.split_off(&nth);
                std::mem::swap(&mut self.windows, &mut after);
                after
            } else {
                std::mem::take(&mut self.windows)
            }
        };
        let mut expr_lst = vec![];
        for (start, end) in first_nth.into_iter() {
            debug!(
                "Time window start: {:?}, end: {:?}",
                start.to_iso8601_string(),
                end.map(|t| t.to_iso8601_string())
            );
            use datafusion_expr::{col, lit};
            let lower = to_df_literal(start)?;
            let upper = end.map(to_df_literal).transpose()?;
            let expr = if let Some(upper) = upper {
                col(col_name)
                    .gt_eq(lit(lower))
                    .and(col(col_name).lt(lit(upper)))
            } else {
                col(col_name).gt_eq(lit(lower))
            };
            expr_lst.push(expr);
        }
        let expr = expr_lst.into_iter().reduce(|a, b| a.or(b));
        Ok(expr)
    }
    /// Merge time windows that overlaps or get too close
    pub fn merge_dirty_time_windows(
        &mut self,
        window_size: chrono::Duration,
        expire_lower_bound: Option<Timestamp>,
    ) -> Result<(), Error> {
        let mut new_windows = BTreeMap::new();
        let mut prev_tw = None;
        for (lower_bound, upper_bound) in std::mem::take(&mut self.windows) {
            // filter out expired time window
            if let Some(expire_lower_bound) = expire_lower_bound {
                if lower_bound <= expire_lower_bound {
                    continue;
                }
            }
            let Some(prev_tw) = &mut prev_tw else {
                prev_tw = Some((lower_bound, upper_bound));
                continue;
            };
            let std_window_size = window_size.to_std().map_err(|e| {
                InternalSnafu {
                    reason: e.to_string(),
                }
                .build()
            })?;
            // if cur.lower - prev.upper <= window_size * 2, merge
            let prev_upper = prev_tw
                .1
                .unwrap_or(prev_tw.0.add_duration(std_window_size).context(TimeSnafu)?);
            prev_tw.1 = Some(prev_upper);
            let cur_upper = upper_bound.unwrap_or(
                lower_bound
                    .add_duration(std_window_size)
                    .context(TimeSnafu)?,
            );
            if lower_bound
                .sub(&prev_upper)
                .map(|dist| dist <= window_size * Self::MERGE_DIST)
                .unwrap_or(false)
            {
                prev_tw.1 = Some(cur_upper);
            } else {
                new_windows.insert(prev_tw.0, prev_tw.1);
                *prev_tw = (lower_bound, Some(cur_upper));
            }
        }
        if let Some(prev_tw) = prev_tw {
            new_windows.insert(prev_tw.0, prev_tw.1);
        }
        self.windows = new_windows;
        Ok(())
    }
 }
 impl RecordingRuleState {
    pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
        Self {
            query_ctx,
            last_update_time: Instant::now(),
            last_query_duration: Duration::from_secs(0),
            dirty_time_windows: Default::default(),
            exec_state: ExecState::Idle,
            shutdown_rx,
        }
    }
    /// called after last query is done
    /// `is_succ` indicate whether the last query is successful
    pub fn after_query_exec(&mut self, elapsed: Duration, _is_succ: bool) {
        self.exec_state = ExecState::Idle;
        self.last_query_duration = elapsed;
        self.last_update_time = Instant::now();
    }
    /// wait for at least `last_query_duration`, at most `max_timeout` to start next query
    pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
        let next_duration = max_timeout
            .unwrap_or(self.last_query_duration)
            .min(self.last_query_duration);
        let next_duration = next_duration.max(MIN_REFRESH_DURATION);
        self.last_update_time + next_duration
    }
 }
 #[derive(Debug, Clone)]
 enum ExecState {
    Idle,
    Executing,
 }
 #[cfg(test)]
 mod test {
    use pretty_assertions::assert_eq;
    use super::*;
    #[test]
    fn test_merge_dirty_time_windows() {
        let mut dirty = DirtyTimeWindows::default();
        dirty.add_lower_bounds(
            vec![
                Timestamp::new_second(0),
                Timestamp::new_second((1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
            ]
            .into_iter(),
        );
        dirty
            .merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
            .unwrap();
        // just enough to merge
        assert_eq!(
            dirty.windows,
            BTreeMap::from([(
                Timestamp::new_second(0),
                Some(Timestamp::new_second(
                    (2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
                ))
            )])
        );
        // separate time window
        let mut dirty = DirtyTimeWindows::default();
        dirty.add_lower_bounds(
            vec![
                Timestamp::new_second(0),
                Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
            ]
            .into_iter(),
        );
        dirty
            .merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
            .unwrap();
        // just enough to merge
        assert_eq!(
            BTreeMap::from([
                (
                    Timestamp::new_second(0),
                    Some(Timestamp::new_second(5 * 60))
                ),
                (
                    Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
                    Some(Timestamp::new_second(
                        (3 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
                    ))
                )
            ]),
            dirty.windows
        );
        // overlapping
        let mut dirty = DirtyTimeWindows::default();
        dirty.add_lower_bounds(
            vec![
                Timestamp::new_second(0),
                Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
            ]
            .into_iter(),
        );
        dirty
            .merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
            .unwrap();
        // just enough to merge
        assert_eq!(
            BTreeMap::from([(
                Timestamp::new_second(0),
                Some(Timestamp::new_second(
                    (1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
                ))
            ),]),
            dirty.windows
        );
        // expired
        let mut dirty = DirtyTimeWindows::default();
        dirty.add_lower_bounds(
            vec![
                Timestamp::new_second(0),
                Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
            ]
            .into_iter(),
        );
        dirty
            .merge_dirty_time_windows(
                chrono::Duration::seconds(5 * 60),
                Some(Timestamp::new_second(
                    (DirtyTimeWindows::MERGE_DIST as i64) * 6 * 60,
                )),
            )
            .unwrap();
        // just enough to merge
        assert_eq!(BTreeMap::from([]), dirty.windows);
    }
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
liyang	96187618c4	setup qemu action	2025-03-05 13:55:39 +08:00
liyang	57695ea21f	test dev builder	2025-03-05 13:43:41 +08:00
liyang	3b7ff55b7c	test dev builder	2025-03-05 13:34:14 +08:00
liyang	6b6cbe852a	test dev builder	2025-03-04 22:18:05 +08:00
liyang	61c3842db5	test dev builder	2025-03-04 21:05:19 +08:00
liyang	79dfc2f9ea	test dev builder	2025-03-04 20:23:00 +08:00
liyang	f4ec1cf201	test dev builder	2025-03-04 20:12:16 +08:00
liyang	f91a183e83	test dev builder	2025-03-04 20:00:01 +08:00
liyang	f1bd2d51fe	test dev builder	2025-03-04 19:54:30 +08:00
liyang	312c174d89	test dev builder	2025-03-04 19:38:52 +08:00
liyang	9b3157b27d	test dev builder	2025-03-04 19:27:55 +08:00
liyang	7f48184e35	test dev builder	2025-03-04 19:18:42 +08:00
liyang	6456d4bdb5	test dev builder	2025-03-04 19:11:34 +08:00