chore: bump version to 0.17.1

Signed-off-by: WenyXu <wenymedia@gmail.com>
chore: reduce SeriesScan sender timeout (#6983 )
2025-12-27 08:29:59 +00:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00
432 changed files with 27558 additions and 9518 deletions
--- a/.github/actions/setup-etcd-cluster/action.yml
+++ b/.github/actions/setup-etcd-cluster/action.yml
@@ -12,7 +12,7 @@ runs:
  steps:
  - name: Install Etcd cluster
    shell: bash
-    run: | 
+    run: |
      helm upgrade \
        --install etcd oci://registry-1.docker.io/bitnamicharts/etcd \
        --set replicaCount=${{ inputs.etcd-replicas }} \
@@ -24,4 +24,9 @@ runs:
        --set auth.rbac.token.enabled=false \
        --set persistence.size=2Gi \
        --create-namespace \
+        --set global.security.allowInsecureImages=true \
+        --set image.registry=docker.io \
+        --set image.repository=greptime/etcd \
+        --set image.tag=3.6.1-debian-12-r3 \
+        --version 12.0.8 \
        -n ${{ inputs.namespace }}
--- a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
@@ -1,3 +1,8 @@
+logging:
+  level: "info"
+  format: "json"
+  filters:
+  - log_store=debug
 meta:
  configData: |-
    [runtime]
--- a/.github/actions/setup-kafka-cluster/action.yml
+++ b/.github/actions/setup-kafka-cluster/action.yml
@@ -12,7 +12,7 @@ runs:
  steps:
  - name: Install Kafka cluster
    shell: bash
-    run: | 
+    run: |
      helm upgrade \
        --install kafka oci://registry-1.docker.io/bitnamicharts/kafka \
        --set controller.replicaCount=${{ inputs.controller-replicas }} \
@@ -23,4 +23,8 @@ runs:
        --set listeners.controller.protocol=PLAINTEXT \
        --set listeners.client.protocol=PLAINTEXT \
        --create-namespace \
+        --set image.registry=docker.io \
+        --set image.repository=greptime/kafka \
+        --set image.tag=3.9.0-debian-12-r1 \
+        --version 31.0.0 \
        -n ${{ inputs.namespace }}
--- a/.github/actions/setup-postgres-cluster/action.yml
+++ b/.github/actions/setup-postgres-cluster/action.yml
@@ -6,9 +6,7 @@ inputs:
    description: "Number of PostgreSQL replicas"
  namespace:
    default: "postgres-namespace"
-  postgres-version:
-    default: "14.2"
-    description: "PostgreSQL version"
+    description: "The PostgreSQL namespace"
  storage-size:
    default: "1Gi"
    description: "Storage size for PostgreSQL"
@@ -22,7 +20,11 @@ runs:
      helm upgrade \
        --install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \
        --set replicaCount=${{ inputs.postgres-replicas }} \
-        --set image.tag=${{ inputs.postgres-version }} \
+        --set global.security.allowInsecureImages=true \
+        --set image.registry=docker.io \
+        --set image.repository=greptime/postgresql \
+        --set image.tag=17.5.0-debian-12-r3 \
+        --version 16.7.4 \
        --set persistence.size=${{ inputs.storage-size }} \
        --set postgresql.username=greptimedb \
        --set postgresql.password=admin \
--- a/.github/scripts/deploy-greptimedb.sh
+++ b/.github/scripts/deploy-greptimedb.sh
@@ -3,12 +3,14 @@
 set -e
 set -o pipefail

-KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.24.0}"
+KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.32.0}"
 ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
 DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
 GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
-ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
 GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
+ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
+ETCD_CHART_VERSION="${ETCD_CHART_VERSION:-12.0.8}"
+ETCD_IMAGE_TAG="${ETCD_IMAGE_TAG:-3.6.1-debian-12-r3}"

 # Create a cluster with 1 control-plane node and 5 workers.
 function create_kind_cluster() {
@@ -35,10 +37,16 @@ function add_greptime_chart() {
 function deploy_etcd_cluster() {
  local namespace="$1"

-  helm install etcd "$ETCD_CHART" \
+  helm upgrade --install etcd "$ETCD_CHART" \
+    --version "$ETCD_CHART_VERSION" \
+    --create-namespace \
    --set replicaCount=3 \
    --set auth.rbac.create=false \
    --set auth.rbac.token.enabled=false \
+    --set global.security.allowInsecureImages=true \
+    --set image.registry=docker.io \
+    --set image.repository=greptime/etcd \
+    --set image.tag="$ETCD_IMAGE_TAG" \
    -n "$namespace"

  # Wait for etcd cluster to be ready.
@@ -48,7 +56,8 @@ function deploy_etcd_cluster() {
 # Deploy greptimedb-operator.
 function deploy_greptimedb_operator() {
  # Use the latest chart and image.
-  helm install greptimedb-operator greptime/greptimedb-operator \
+  helm upgrade --install greptimedb-operator greptime/greptimedb-operator \
+    --create-namespace \
    --set image.tag=latest \
    -n "$DEFAULT_INSTALL_NAMESPACE"

@@ -66,9 +75,11 @@ function deploy_greptimedb_cluster() {

  deploy_etcd_cluster "$install_namespace"

-  helm install "$cluster_name" greptime/greptimedb-cluster \
+  helm upgrade --install "$cluster_name" greptime/greptimedb-cluster \
+    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
+    --set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
    -n "$install_namespace"

  # Wait for greptimedb cluster to be ready.
@@ -101,15 +112,17 @@ function deploy_greptimedb_cluster_with_s3_storage() {

  deploy_etcd_cluster "$install_namespace"

-  helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
+  helm upgrade --install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
+    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
-    --set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
-    --set storage.s3.region="$AWS_REGION" \
-    --set storage.s3.root="$DATA_ROOT" \
-    --set storage.credentials.secretName=s3-credentials \
-    --set storage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
-    --set storage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
+    --set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
+    --set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
+    --set objectStorage.s3.region="$AWS_REGION" \
+    --set objectStorage.s3.root="$DATA_ROOT" \
+    --set objectStorage.credentials.secretName=s3-credentials \
+    --set objectStorage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
+    --set objectStorage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"

  # Wait for greptimedb cluster to be ready.
  while true; do
@@ -134,7 +147,8 @@ function deploy_greptimedb_cluster_with_s3_storage() {
 # Deploy standalone greptimedb.
 # It will expose cluster service ports as '34000', '34001', '34002', '34003' to local access.
 function deploy_standalone_greptimedb() {
-  helm install greptimedb-standalone greptime/greptimedb-standalone \
+  helm upgrade --install greptimedb-standalone greptime/greptimedb-standalone \
+    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
    -n "$DEFAULT_INSTALL_NAMESPACE"

--- a/.github/scripts/pull-test-deps-images.sh
+++ b/.github/scripts/pull-test-deps-images.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# This script is used to pull the test dependency images that are stored in public ECR one by one to avoid rate limiting.
+
+set -e
+
+MAX_RETRIES=3
+
+IMAGES=(
+  "greptime/zookeeper:3.7"
+  "greptime/kafka:3.9.0-debian-12-r1"
+  "greptime/etcd:3.6.1-debian-12-r3"
+  "greptime/minio:2024"
+  "greptime/mysql:5.7"
+)
+
+for image in "${IMAGES[@]}"; do
+  for ((attempt=1; attempt<=MAX_RETRIES; attempt++)); do
+    if docker pull "$image"; then
+      # Successfully pulled the image.
+      break
+    else
+      # Use some simple exponential backoff to avoid rate limiting.
+      if [ $attempt -lt $MAX_RETRIES ]; then
+        sleep_seconds=$((attempt * 5))
+        echo "Attempt $attempt failed for $image, waiting $sleep_seconds seconds"
+        sleep $sleep_seconds  # 5s, 10s delays
+      else
+        echo "Failed to pull $image after $MAX_RETRIES attempts"
+        exit 1
+      fi
+    fi
+  done
+done
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -618,10 +618,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          persist-credentials: false
+
      - if: matrix.mode.kafka
        name: Setup kafka server
        working-directory: tests-integration/fixtures
-        run: docker compose up -d --wait kafka
+        run:  ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait kafka
+
      - name: Download pre-built binaries
        uses: actions/download-artifact@v4
        with:
@@ -683,6 +685,30 @@ jobs:
      - name: Run cargo clippy
        run: make clippy

+  check-udeps:
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
+    name: Check Unused Dependencies
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+      - uses: arduino/setup-protoc@v3
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+      - name: Rust Cache
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "check-udeps"
+          cache-all-crates: "true"
+          save-if: ${{ github.ref == 'refs/heads/main' }}
+      - name: Install cargo-udeps
+        run: cargo install cargo-udeps --locked
+      - name: Check unused dependencies
+        run: make check-udeps
+
  conflict-check:
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    name: Check for conflict
@@ -698,7 +724,7 @@ jobs:
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'merge_group' }}
    runs-on: ubuntu-22.04-arm
    timeout-minutes: 60
-    needs:  [conflict-check, clippy, fmt]
+    needs: [conflict-check, clippy, fmt, check-udeps]
    steps:
      - uses: actions/checkout@v4
        with:
@@ -720,9 +746,11 @@ jobs:
          save-if: ${{ github.ref == 'refs/heads/main' }}
      - name: Install latest nextest release
        uses: taiki-e/install-action@nextest
+
      - name: Setup external services
        working-directory: tests-integration/fixtures
-        run: docker compose up -d --wait
+        run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
+
      - name: Run nextest cases
        run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend
        env:
@@ -739,8 +767,11 @@ jobs:
          GT_MINIO_ACCESS_KEY: superpower_password
          GT_MINIO_REGION: us-west-2
          GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
+          GT_ETCD_TLS_ENDPOINTS: https://127.0.0.1:2378
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
          GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
+          GT_POSTGRES15_ENDPOINTS: postgres://test_user:test_password@127.0.0.1:5433/postgres
+          GT_POSTGRES15_SCHEMA: test_schema
          GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
@@ -773,9 +804,11 @@ jobs:
        uses: taiki-e/install-action@nextest
      - name: Install cargo-llvm-cov
        uses: taiki-e/install-action@cargo-llvm-cov
+
      - name: Setup external services
        working-directory: tests-integration/fixtures
-        run: docker compose up -d --wait
+        run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
+        
      - name: Run nextest cases
        run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
        env:
@@ -791,8 +824,11 @@ jobs:
          GT_MINIO_ACCESS_KEY: superpower_password
          GT_MINIO_REGION: us-west-2
          GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
+          GT_ETCD_TLS_ENDPOINTS: https://127.0.0.1:2378
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
          GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
+          GT_POSTGRES15_ENDPOINTS: postgres://test_user:test_password@127.0.0.1:5433/postgres
+          GT_POSTGRES15_SCHEMA: test_schema
          GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -67,6 +67,12 @@ jobs:
    steps:
      - run: 'echo "No action required"'

+  check-udeps:
+    name: Unused Dependencies
+    runs-on: ubuntu-latest
+    steps:
+      - run: 'echo "No action required"'
+
  coverage:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -1,7 +1,7 @@
 name: "Semantic Pull Request"

 on:
-  pull_request:
+  pull_request_target:
    types:
      - opened
      - reopened
@@ -12,9 +12,9 @@ concurrency:
  cancel-in-progress: true

 permissions:
-  issues: write
-  contents: write
+  contents: read
  pull-requests: write
+  issues: write

 jobs:
  check:
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,9 @@ venv/
 tests-fuzz/artifacts/
 tests-fuzz/corpus/

+# cargo-udeps reports
+udeps-report.json
+
 # Nix
 .direnv
 .envrc
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -57,13 +57,16 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
 - Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/) and [style guide](docs/style-guide.md).
 - Make sure all unit tests are passed using [nextest](https://nexte.st/index.html) `cargo nextest run --workspace --features pg_kvbackend,mysql_kvbackend` or `make test`.
 - Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings` or `make clippy`).
+- Ensure there are no unused dependencies by running `make check-udeps` (clean them up with `make fix-udeps` if reported).
+- If you must keep a target-specific dependency (e.g. under `[target.'cfg(...)'.dev-dependencies]`), add a cargo-udeps ignore entry in the same `Cargo.toml`, for example:
+  `[package.metadata.cargo-udeps.ignore]` with `development = ["rexpect"]` (or `dependencies`/`build` as appropriate).
 - When modifying sample configuration files in `config/`, run `make config-docs` (which requires Docker to be installed) to update the configuration documentation and include it in your commit.

 #### `pre-commit` Hooks

 You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.

-1. Install `pre-commit`
+1.  Install `pre-commit`

        pip install pre-commit

@@ -71,7 +74,7 @@ You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run

        brew install pre-commit

-2. Install the `pre-commit` hooks
+2.  Install the `pre-commit` hooks

        $ pre-commit install
        pre-commit installed at .git/hooks/pre-commit
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -218,7 +218,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"

 [[package]]
 name = "api"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-base",
 "common-decimal",
@@ -737,7 +737,7 @@ dependencies = [

 [[package]]
 name = "auth"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -1387,7 +1387,7 @@ dependencies = [

 [[package]]
 name = "cache"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "catalog",
 "common-error",
@@ -1422,7 +1422,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

 [[package]]
 name = "catalog"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arrow",
@@ -1763,7 +1763,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"

 [[package]]
 name = "cli"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -1807,7 +1807,7 @@ dependencies = [
 "session",
 "snafu 0.8.6",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tempfile",
 "tokio",
@@ -1816,7 +1816,7 @@ dependencies = [

 [[package]]
 name = "client"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arc-swap",
@@ -1848,7 +1848,7 @@ dependencies = [
 "serde_json",
 "snafu 0.8.6",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "substrait 0.37.3",
 "tokio",
 "tokio-stream",
@@ -1889,7 +1889,7 @@ dependencies = [

 [[package]]
 name = "cmd"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "auth",
@@ -1951,7 +1951,7 @@ dependencies = [
 "snafu 0.8.6",
 "stat",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "temp-env",
 "tempfile",
@@ -1997,7 +1997,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"

 [[package]]
 name = "common-base"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "anymap2",
 "async-trait",
@@ -2019,11 +2019,11 @@ dependencies = [

 [[package]]
 name = "common-catalog"
-version = "0.17.0"
+version = "0.17.1"

 [[package]]
 name = "common-config"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-base",
 "common-error",
@@ -2049,7 +2049,7 @@ dependencies = [

 [[package]]
 name = "common-datasource"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "arrow",
 "arrow-schema",
@@ -2084,7 +2084,7 @@ dependencies = [

 [[package]]
 name = "common-decimal"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "bigdecimal 0.4.8",
 "common-error",
@@ -2097,7 +2097,7 @@ dependencies = [

 [[package]]
 name = "common-error"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-macro",
 "http 1.3.1",
@@ -2108,7 +2108,7 @@ dependencies = [

 [[package]]
 name = "common-event-recorder"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -2130,7 +2130,7 @@ dependencies = [

 [[package]]
 name = "common-frontend"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -2152,7 +2152,7 @@ dependencies = [

 [[package]]
 name = "common-function"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "api",
@@ -2189,7 +2189,7 @@ dependencies = [
 "hyperloglogplus",
 "jsonb",
 "memchr",
- "nalgebra 0.33.2",
+ "nalgebra",
 "num",
 "num-traits",
 "once_cell",
@@ -2201,7 +2201,6 @@ dependencies = [
 "session",
 "snafu 0.8.6",
 "sql",
- "statrs",
 "store-api",
 "table",
 "tokio",
@@ -2211,7 +2210,7 @@ dependencies = [

 [[package]]
 name = "common-greptimedb-telemetry"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "common-runtime",
@@ -2228,7 +2227,7 @@ dependencies = [

 [[package]]
 name = "common-grpc"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arrow-flight",
@@ -2261,7 +2260,7 @@ dependencies = [

 [[package]]
 name = "common-grpc-expr"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "common-base",
@@ -2281,23 +2280,18 @@ dependencies = [

 [[package]]
 name = "common-macro"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
- "arc-swap",
- "common-query",
- "datatypes",
 "greptime-proto",
 "once_cell",
 "proc-macro2",
 "quote",
- "snafu 0.8.6",
- "static_assertions",
 "syn 2.0.104",
 ]

 [[package]]
 name = "common-mem-prof"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "anyhow",
 "common-error",
@@ -2313,7 +2307,7 @@ dependencies = [

 [[package]]
 name = "common-meta"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "anymap2",
 "api",
@@ -2352,7 +2346,6 @@ dependencies = [
 "greptime-proto",
 "hex",
 "humantime-serde",
- "hyper 0.14.32",
 "itertools 0.14.0",
 "lazy_static",
 "moka",
@@ -2386,7 +2379,7 @@ dependencies = [

 [[package]]
 name = "common-options"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-grpc",
 "humantime-serde",
@@ -2395,11 +2388,11 @@ dependencies = [

 [[package]]
 name = "common-plugins"
-version = "0.17.0"
+version = "0.17.1"

 [[package]]
 name = "common-pprof"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-error",
 "common-macro",
@@ -2411,7 +2404,7 @@ dependencies = [

 [[package]]
 name = "common-procedure"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-stream",
@@ -2440,7 +2433,7 @@ dependencies = [

 [[package]]
 name = "common-procedure-test"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "common-procedure",
@@ -2450,7 +2443,7 @@ dependencies = [

 [[package]]
 name = "common-query"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -2469,14 +2462,13 @@ dependencies = [
 "snafu 0.8.6",
 "sqlparser 0.55.0-greptime",
 "sqlparser_derive 0.1.1",
- "statrs",
 "store-api",
 "tokio",
 ]

 [[package]]
 name = "common-recordbatch"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "arc-swap",
 "common-error",
@@ -2497,7 +2489,7 @@ dependencies = [

 [[package]]
 name = "common-runtime"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "clap 4.5.40",
@@ -2512,7 +2504,6 @@ dependencies = [
 "paste",
 "pin-project",
 "prometheus",
- "rand 0.9.1",
 "ratelimit",
 "serde",
 "serde_json",
@@ -2527,7 +2518,7 @@ dependencies = [

 [[package]]
 name = "common-session"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "serde",
 "strum 0.27.1",
@@ -2535,10 +2526,9 @@ dependencies = [

 [[package]]
 name = "common-sql"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-base",
- "common-datasource",
 "common-decimal",
 "common-error",
 "common-macro",
@@ -2554,7 +2544,7 @@ dependencies = [

 [[package]]
 name = "common-telemetry"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "backtrace",
 "common-base",
@@ -2583,7 +2573,7 @@ dependencies = [

 [[package]]
 name = "common-test-util"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "client",
 "common-grpc",
@@ -2596,7 +2586,7 @@ dependencies = [

 [[package]]
 name = "common-time"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "arrow",
 "chrono",
@@ -2614,7 +2604,7 @@ dependencies = [

 [[package]]
 name = "common-version"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "build-data",
 "cargo-manifest",
@@ -2625,7 +2615,7 @@ dependencies = [

 [[package]]
 name = "common-wal"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "common-base",
 "common-error",
@@ -2648,9 +2638,8 @@ dependencies = [

 [[package]]
 name = "common-workload"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
- "api",
 "common-telemetry",
 "serde",
 ]
@@ -2912,7 +2901,7 @@ dependencies = [
 "cast",
 "ciborium",
 "clap 3.2.25",
- "criterion-plot",
+ "criterion-plot 0.5.0",
 "futures",
 "itertools 0.10.5",
 "lazy_static",
@@ -2939,7 +2928,7 @@ dependencies = [
 "cast",
 "ciborium",
 "clap 4.5.40",
- "criterion-plot",
+ "criterion-plot 0.5.0",
 "is-terminal",
 "itertools 0.10.5",
 "num-traits",
@@ -2955,6 +2944,29 @@ dependencies = [
 "walkdir",
 ]

+[[package]]
+name = "criterion"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap 4.5.40",
+ "criterion-plot 0.6.0",
+ "itertools 0.13.0",
+ "num-traits",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
 [[package]]
 name = "criterion-plot"
 version = "0.5.0"
@@ -2965,6 +2977,16 @@ dependencies = [
 "itertools 0.10.5",
 ]

+[[package]]
+name = "criterion-plot"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
+dependencies = [
+ "cast",
+ "itertools 0.13.0",
+]
+
 [[package]]
 name = "crossbeam"
 version = "0.8.4"
@@ -3843,7 +3865,7 @@ dependencies = [

 [[package]]
 name = "datanode"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arrow-flight",
@@ -3896,7 +3918,7 @@ dependencies = [
 "session",
 "snafu 0.8.6",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tokio",
 "toml 0.8.23",
@@ -3906,7 +3928,7 @@ dependencies = [

 [[package]]
 name = "datatypes"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4580,7 +4602,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"

 [[package]]
 name = "file-engine"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -4712,7 +4734,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"

 [[package]]
 name = "flow"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arrow",
@@ -4779,7 +4801,7 @@ dependencies = [
 "sql",
 "store-api",
 "strum 0.27.1",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tokio",
 "tonic 0.13.1",
@@ -4834,7 +4856,7 @@ checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"

 [[package]]
 name = "frontend"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arc-swap",
@@ -4897,7 +4919,7 @@ dependencies = [
 "sqlparser 0.55.0-greptime",
 "store-api",
 "strfmt",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tokio",
 "tokio-util",
@@ -5277,7 +5299,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=df2bb74b5990c159dfd5b7a344eecf8f4307af64#df2bb74b5990c159dfd5b7a344eecf8f4307af64"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=66eb089afa6baaa3ddfafabd0a4abbe317d012c3#66eb089afa6baaa3ddfafabd0a4abbe317d012c3"
 dependencies = [
 "prost 0.13.5",
 "prost-types 0.13.5",
@@ -6039,7 +6061,7 @@ dependencies = [

 [[package]]
 name = "index"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "asynchronous-codec",
@@ -6979,7 +7001,7 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"

 [[package]]
 name = "log-query"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "chrono",
 "common-error",
@@ -6991,7 +7013,7 @@ dependencies = [

 [[package]]
 name = "log-store"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-stream",
 "async-trait",
@@ -7021,7 +7043,6 @@ dependencies = [
 "protobuf-build",
 "raft-engine",
 "rand 0.9.1",
- "rand_distr",
 "rskafka",
 "serde",
 "serde_json",
@@ -7263,8 +7284,7 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
 [[package]]
 name = "memcomparable"
 version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "376101dbd964fc502d5902216e180f92b3d003b5cc3d2e40e044eb5470fca677"
+source = "git+https://github.com/v0y4g3r/memcomparable.git?rev=a07122dc03556bbd88ad66234cbea7efd3b23efb#a07122dc03556bbd88ad66234cbea7efd3b23efb"
 dependencies = [
 "bytes",
 "serde",
@@ -7300,7 +7320,7 @@ dependencies = [

 [[package]]
 name = "meta-client"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -7328,7 +7348,7 @@ dependencies = [

 [[package]]
 name = "meta-srv"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -7369,12 +7389,10 @@ dependencies = [
 "http-body-util",
 "humantime",
 "humantime-serde",
- "hyper 0.14.32",
 "hyper-util",
 "itertools 0.14.0",
 "lazy_static",
 "local-ip-address",
- "log-store",
 "once_cell",
 "parking_lot 0.12.4",
 "prometheus",
@@ -7426,13 +7444,14 @@ dependencies = [

 [[package]]
 name = "metric-engine"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "aquamarine",
 "async-stream",
 "async-trait",
 "base64 0.22.1",
+ "bytes",
 "common-base",
 "common-error",
 "common-macro",
@@ -7518,7 +7537,7 @@ dependencies = [

 [[package]]
 name = "mito-codec"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "bytes",
@@ -7529,6 +7548,7 @@ dependencies = [
 "common-recordbatch",
 "common-telemetry",
 "common-time",
+ "criterion 0.7.0",
 "datafusion-common",
 "datafusion-expr",
 "datatypes",
@@ -7541,7 +7561,7 @@ dependencies = [

 [[package]]
 name = "mito2"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "aquamarine",
@@ -7550,6 +7570,7 @@ dependencies = [
 "async-trait",
 "bytemuck",
 "bytes",
+ "chrono",
 "common-base",
 "common-config",
 "common-datasource",
@@ -7581,7 +7602,6 @@ dependencies = [
 "itertools 0.14.0",
 "lazy_static",
 "log-store",
- "memcomparable",
 "mito-codec",
 "moka",
 "object-store",
@@ -7867,24 +7887,6 @@ dependencies = [
 "zstd 0.13.3",
 ]

-[[package]]
-name = "nalgebra"
-version = "0.29.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff"
-dependencies = [
- "approx 0.5.1",
- "matrixmultiply",
- "nalgebra-macros 0.1.0",
- "num-complex",
- "num-rational",
- "num-traits",
- "rand 0.8.5",
- "rand_distr",
- "simba 0.6.0",
- "typenum",
-]
-
 [[package]]
 name = "nalgebra"
 version = "0.33.2"
@@ -7893,25 +7895,14 @@ checksum = "26aecdf64b707efd1310e3544d709c5c0ac61c13756046aaaba41be5c4f66a3b"
 dependencies = [
 "approx 0.5.1",
 "matrixmultiply",
- "nalgebra-macros 0.2.2",
+ "nalgebra-macros",
 "num-complex",
 "num-rational",
 "num-traits",
- "simba 0.9.0",
+ "simba",
 "typenum",
 ]

-[[package]]
-name = "nalgebra-macros"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
 [[package]]
 name = "nalgebra-macros"
 version = "0.2.2"
@@ -8304,7 +8295,7 @@ dependencies = [

 [[package]]
 name = "object-store"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "anyhow",
 "bytes",
@@ -8589,7 +8580,7 @@ dependencies = [

 [[package]]
 name = "operator"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "api",
@@ -8647,7 +8638,7 @@ dependencies = [
 "sql",
 "sqlparser 0.55.0-greptime",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tokio",
 "tokio-util",
@@ -8959,7 +8950,7 @@ dependencies = [

 [[package]]
 name = "partition"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -9298,7 +9289,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"

 [[package]]
 name = "pipeline"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "api",
@@ -9347,7 +9338,6 @@ dependencies = [
 "serde",
 "serde_json",
 "session",
- "simd-json",
 "snafu 0.8.6",
 "sql",
 "table",
@@ -9455,7 +9445,7 @@ dependencies = [

 [[package]]
 name = "plugins"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "auth",
 "clap 4.5.40",
@@ -9753,7 +9743,7 @@ dependencies = [

 [[package]]
 name = "promql"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "async-trait",
@@ -10036,7 +10026,7 @@ dependencies = [

 [[package]]
 name = "puffin"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-compression 0.4.19",
 "async-trait",
@@ -10078,7 +10068,7 @@ dependencies = [

 [[package]]
 name = "query"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "api",
@@ -10122,7 +10112,7 @@ dependencies = [
 "log-query",
 "meter-core",
 "meter-macros",
- "nalgebra 0.33.2",
+ "nalgebra",
 "num",
 "num-traits",
 "object-store",
@@ -10142,9 +10132,8 @@ dependencies = [
 "snafu 0.8.6",
 "sql",
 "sqlparser 0.55.0-greptime",
- "statrs",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tokio",
 "tokio-stream",
@@ -10787,7 +10776,7 @@ dependencies = [
 [[package]]
 name = "rskafka"
 version = "0.6.0"
-source = "git+https://github.com/influxdata/rskafka.git?rev=a62120b6c74d68953464b256f858dc1c41a903b4#a62120b6c74d68953464b256f858dc1c41a903b4"
+source = "git+https://github.com/WenyXu/rskafka.git?rev=7b0f31ed39db049b4ee2e5f1e95b5a30be9baf76#7b0f31ed39db049b4ee2e5f1e95b5a30be9baf76"
 dependencies = [
 "bytes",
 "chrono",
@@ -11508,7 +11497,7 @@ dependencies = [

 [[package]]
 name = "servers"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "api",
@@ -11528,7 +11517,6 @@ dependencies = [
 "client",
 "common-base",
 "common-catalog",
- "common-config",
 "common-error",
 "common-frontend",
 "common-grpc",
@@ -11632,7 +11620,7 @@ dependencies = [

 [[package]]
 name = "session"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "ahash 0.8.12",
 "api",
@@ -11645,6 +11633,7 @@ dependencies = [
 "common-session",
 "common-telemetry",
 "common-time",
+ "datafusion-common",
 "derive_builder 0.20.2",
 "derive_more",
 "snafu 0.8.6",
@@ -11740,19 +11729,6 @@ dependencies = [
 "rand_core 0.6.4",
 ]

-[[package]]
-name = "simba"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f"
-dependencies = [
- "approx 0.5.1",
- "num-complex",
- "num-traits",
- "paste",
- "wide",
-]
-
 [[package]]
 name = "simba"
 version = "0.9.0"
@@ -11972,7 +11948,7 @@ dependencies = [

 [[package]]
 name = "sql"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arrow-buffer",
@@ -12030,7 +12006,7 @@ dependencies = [

 [[package]]
 name = "sqlness-runner"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "clap 4.5.40",
@@ -12330,7 +12306,7 @@ dependencies = [

 [[package]]
 name = "stat"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "nix 0.30.1",
 ]
@@ -12341,22 +12317,9 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"

-[[package]]
-name = "statrs"
-version = "0.16.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b35a062dbadac17a42e0fc64c27f419b25d6fae98572eb43c8814c9e873d7721"
-dependencies = [
- "approx 0.5.1",
- "lazy_static",
- "nalgebra 0.29.0",
- "num-traits",
- "rand 0.8.5",
-]
-
 [[package]]
 name = "store-api"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "aquamarine",
@@ -12371,6 +12334,7 @@ dependencies = [
 "common-sql",
 "common-time",
 "common-wal",
+ "datafusion-common",
 "datafusion-expr",
 "datafusion-physical-plan",
 "datatypes",
@@ -12502,7 +12466,7 @@ dependencies = [

 [[package]]
 name = "substrait"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "async-trait",
 "bytes",
@@ -12670,7 +12634,7 @@ dependencies = [

 [[package]]
 name = "table"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "async-trait",
@@ -12939,7 +12903,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"

 [[package]]
 name = "tests-fuzz"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "arbitrary",
 "async-trait",
@@ -12983,7 +12947,7 @@ dependencies = [

 [[package]]
 name = "tests-integration"
-version = "0.17.0"
+version = "0.17.1"
 dependencies = [
 "api",
 "arrow-flight",
@@ -13055,7 +13019,7 @@ dependencies = [
 "sql",
 "sqlx",
 "store-api",
- "substrait 0.17.0",
+ "substrait 0.17.1",
 "table",
 "tempfile",
 "time",
@@ -13523,6 +13487,7 @@ dependencies = [
 "percent-encoding",
 "pin-project",
 "prost 0.13.5",
+ "rustls-native-certs 0.8.1",
 "socket2 0.5.10",
 "tokio",
 "tokio-rustls",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,7 +73,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.17.0"
+version = "0.17.1"
 edition = "2021"
 license = "Apache-2.0"

@@ -138,11 +138,14 @@ deadpool-postgres = "0.14"
 derive_builder = "0.20"
 dotenv = "0.15"
 either = "1.15"
-etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62df834f0cffda355eba96691fe1a9a332b75a7" }
+etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62df834f0cffda355eba96691fe1a9a332b75a7", features = [
+    "tls",
+    "tls-roots",
+] }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "df2bb74b5990c159dfd5b7a344eecf8f4307af64" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "66eb089afa6baaa3ddfafabd0a4abbe317d012c3" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -191,7 +194,7 @@ reqwest = { version = "0.12", default-features = false, features = [
    "stream",
    "multipart",
 ] }
-rskafka = { git = "https://github.com/influxdata/rskafka.git", rev = "a62120b6c74d68953464b256f858dc1c41a903b4", features = [
+rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "7b0f31ed39db049b4ee2e5f1e95b5a30be9baf76", features = [
    "transport-tls",
 ] }
 rstest = "0.25"
--- a/13
+++ b/13
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
 IMAGE_REGISTRY ?= docker.io
 IMAGE_NAMESPACE ?= greptime
 IMAGE_TAG ?= latest
-DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-32619816-20250818043248
+DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-f55023f3-20250829091211
 BUILDX_MULTI_PLATFORM_BUILD ?= false
 BUILDX_BUILDER_NAME ?= gtbuilder
 BASE_IMAGE ?= ubuntu
@@ -193,6 +193,17 @@ clippy: ## Check clippy rules.
 fix-clippy: ## Fix clippy violations.
 	cargo clippy --workspace --all-targets --all-features --fix

+.PHONY: check-udeps
+check-udeps: ## Check unused dependencies.
+	cargo udeps --workspace --all-targets
+
+.PHONY: fix-udeps
+fix-udeps: ## Remove unused dependencies automatically.
+	@echo "Running cargo-udeps to find unused dependencies..."
+	@cargo udeps --workspace --all-targets --output json > udeps-report.json || true
+	@echo "Removing unused dependencies..."
+	@python3 scripts/fix-udeps.py udeps-report.json
+	
 .PHONY: fmt-check
 fmt-check: ## Check code format.
 	cargo fmt --all -- --check
--- a/config/config.md
+++ b/config/config.md
@@ -148,7 +148,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
-| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -245,6 +245,16 @@
 | `grpc.tls.cert_path` | String | Unset | Certificate file path. |
 | `grpc.tls.key_path` | String | Unset | Private key file path. |
 | `grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
+| `internal_grpc` | -- | -- | The internal gRPC server options. Internal gRPC port for nodes inside cluster to access frontend. |
+| `internal_grpc.bind_addr` | String | `127.0.0.1:4010` | The address to bind the gRPC server. |
+| `internal_grpc.server_addr` | String | `127.0.0.1:4010` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
+| `internal_grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `internal_grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
+| `internal_grpc.tls` | -- | -- | internal gRPC server TLS options, see `mysql.tls` section. |
+| `internal_grpc.tls.mode` | String | `disable` | TLS mode. |
+| `internal_grpc.tls.cert_path` | String | Unset | Certificate file path. |
+| `internal_grpc.tls.key_path` | String | Unset | Private key file path. |
+| `internal_grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
 | `mysql` | -- | -- | MySQL server options. |
 | `mysql.enable` | Bool | `true` | Whether to enable. |
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
@@ -333,6 +343,7 @@
 | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
 | `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store`<br/>- `mysql_store` |
 | `meta_table_name` | String | `greptime_metakv` | Table name in RDS to store metadata. Effect when using a RDS kvbackend.<br/>**Only used when backend is `postgres_store`.** |
+| `meta_schema_name` | String | `greptime_schema` | Optional PostgreSQL schema for metadata table and election table name qualification.<br/>When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),<br/>set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.<br/>GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.<br/>**Only used when backend is `postgres_store`.** |
 | `meta_election_lock_id` | Integer | `1` | Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend<br/>Only used when backend is `postgres_store`. |
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
@@ -344,7 +355,7 @@
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
-| `backend_tls` | -- | -- | TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)<br/>When using PostgreSQL or MySQL as metadata store, you can configure TLS here |
+| `backend_tls` | -- | -- | TLS configuration for kv store backend (applicable for etcd, PostgreSQL, and MySQL backends)<br/>When using etcd, PostgreSQL, or MySQL as metadata store, you can configure TLS here |
 | `backend_tls.mode` | String | `prefer` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- "disable" - No TLS<br/>- "prefer" (default) - Try TLS, fallback to plain<br/>- "require" - Require TLS<br/>- "verify_ca" - Require TLS and verify CA<br/>- "verify_full" - Require TLS and verify hostname |
 | `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
 | `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
@@ -379,8 +390,9 @@
 | `wal.provider` | String | `raft_engine` | -- |
 | `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster.<br/><br/>**It's only used when the provider is `kafka`**. |
 | `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)`<br/>**It's only used when the provider is `kafka`**. |
-| `wal.auto_prune_interval` | String | `10m` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.auto_prune_interval` | String | `30m` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.flush_trigger_size` | String | `512MB` | Estimated size threshold to trigger a flush when using Kafka remote WAL.<br/>Since multiple regions may share a Kafka topic, the estimated size is calculated as:<br/>  (latest_entry_id - flushed_entry_id) * avg_record_size<br/>MetaSrv triggers a flush for a region when this estimated size exceeds `flush_trigger_size`.<br/>- `latest_entry_id`: The latest entry ID in the topic.<br/>- `flushed_entry_id`: The last flushed entry ID for the region.<br/>Set to "0" to let the system decide the flush trigger size.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.checkpoint_trigger_size` | String | `128MB` | Estimated size threshold to trigger a checkpoint when using Kafka remote WAL.<br/>The estimated size is calculated as:<br/>  (latest_entry_id - last_checkpoint_entry_id) * avg_record_size<br/>MetaSrv triggers a checkpoint for a region when this estimated size exceeds `checkpoint_trigger_size`.<br/>Set to "0" to let the system decide the checkpoint trigger size.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.num_topics` | Integer | `64` | Number of topics used for remote WAL.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
@@ -390,8 +402,8 @@
 | `event_recorder` | -- | -- | Configuration options for the event recorder. |
 | `event_recorder.ttl` | String | `90d` | TTL for the events table that will be used to store the events. Default is `90d`. |
 | `stats_persistence` | -- | -- | Configuration options for the stats persistence. |
-| `stats_persistence.ttl` | String | `30d` | TTL for the stats table that will be used to store the stats. Default is `30d`.<br/>Set to `0s` to disable stats persistence. |
-| `stats_persistence.interval` | String | `60s` | The interval to persist the stats. Default is `60s`.<br/>The minimum value is `60s`, if the value is less than `60s`, it will be overridden to `60s`. |
+| `stats_persistence.ttl` | String | `0s` | TTL for the stats table that will be used to store the stats.<br/>Set to `0s` to disable stats persistence.<br/>Default is `0s`.<br/>If you want to enable stats persistence, set the TTL to a value greater than 0.<br/>It is recommended to set a small value, e.g., `3h`. |
+| `stats_persistence.interval` | String | `10m` | The interval to persist the stats. Default is `10m`.<br/>The minimum value is `10m`, if the value is less than `10m`, it will be overridden to `10m`. |
 | `logging` | -- | -- | The logging options. |
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
@@ -509,6 +521,8 @@
 | `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
 | `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
 | `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
+| `region_engine.mito.experimental_manifest_keep_removed_file_count` | Integer | `256` | Number of removed files to keep in manifest's `removed_files` field before also<br/>remove them from `removed_files`. Mostly for debugging purpose.<br/>If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files<br/>from `removed_files` field. |
+| `region_engine.mito.experimental_manifest_keep_removed_file_ttl` | String | `1h` | How long to keep removed files in the `removed_files` field of manifest<br/>after they are removed from manifest.<br/>files will only be removed from `removed_files` field<br/>if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached. |
 | `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
 | `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
 | `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
@@ -526,7 +540,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
-| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -409,6 +409,19 @@ worker_request_batch_size = 64
 ## Number of meta action updated to trigger a new checkpoint for the manifest.
 manifest_checkpoint_distance = 10

+
+## Number of removed files to keep in manifest's `removed_files` field before also
+## remove them from `removed_files`. Mostly for debugging purpose.
+## If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files
+## from `removed_files` field.
+experimental_manifest_keep_removed_file_count = 256
+
+## How long to keep removed files in the `removed_files` field of manifest
+## after they are removed from manifest.
+## files will only be removed from `removed_files` field
+## if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached.
+experimental_manifest_keep_removed_file_ttl = "1h"
+
 ## Whether to compress manifest and checkpoint file by gzip (default false).
 compress_manifest = false

@@ -475,7 +488,7 @@ sst_write_buffer_size = "8MB"
 parallel_scan_channel_size = 32

 ## Maximum number of SST files to scan concurrently.
-max_concurrent_scan_files = 128
+max_concurrent_scan_files = 384

 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -79,6 +79,42 @@ key_path = ""
 ## For now, gRPC tls config does not support auto reload.
 watch = false

+## The internal gRPC server options. Internal gRPC port for nodes inside cluster to access frontend.
+[internal_grpc]
+## The address to bind the gRPC server.
+bind_addr = "127.0.0.1:4010"
+## The address advertised to the metasrv, and used for connections from outside the host.
+## If left empty or unset, the server will automatically use the IP address of the first network interface
+## on the host, with the same port number as the one specified in `grpc.bind_addr`.
+server_addr = "127.0.0.1:4010"
+## The number of server worker threads.
+runtime_size = 8
+## Compression mode for frontend side Arrow IPC service. Available options:
+## - `none`: disable all compression
+## - `transport`: only enable gRPC transport compression (zstd)
+## - `arrow_ipc`: only enable Arrow IPC compression (lz4)
+## - `all`: enable all compression.
+## Default to `none`
+flight_compression = "arrow_ipc"
+
+## internal gRPC server TLS options, see `mysql.tls` section.
+[internal_grpc.tls]
+## TLS mode.
+mode = "disable"
+
+## Certificate file path.
+## @toml2docs:none-default
+cert_path = ""
+
+## Private key file path.
+## @toml2docs:none-default
+key_path = ""
+
+## Watch for Certificate and key file change and auto reload.
+## For now, gRPC tls config does not support auto reload.
+watch = false
+
+
 ## MySQL server options.
 [mysql]
 ## Whether to enable.
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -23,6 +23,14 @@ backend = "etcd_store"
 ## **Only used when backend is `postgres_store`.**
 meta_table_name = "greptime_metakv"

+## Optional PostgreSQL schema for metadata table and election table name qualification.
+## When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),
+## set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.
+## GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.
+## **Only used when backend is `postgres_store`.**
+
+meta_schema_name = "greptime_schema"
+
 ## Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend
 ## Only used when backend is `postgres_store`.
 meta_election_lock_id = 1
@@ -65,8 +73,8 @@ node_max_idle_time = "24hours"
 ## The number of threads to execute the runtime for global write operations.
 #+ compact_rt_size = 4

-## TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)
-## When using PostgreSQL or MySQL as metadata store, you can configure TLS here
+## TLS configuration for kv store backend (applicable for etcd, PostgreSQL, and MySQL backends)
+## When using etcd, PostgreSQL, or MySQL as metadata store, you can configure TLS here
 [backend_tls]
 ## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
 ## - "disable" - No TLS
@@ -190,7 +198,7 @@ auto_create_topics = true
 ## Interval of automatically WAL pruning.
 ## Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.
 ## **It's only used when the provider is `kafka`**.
-auto_prune_interval = "10m"
+auto_prune_interval = "30m"


 ## Estimated size threshold to trigger a flush when using Kafka remote WAL.
@@ -203,6 +211,14 @@ auto_prune_interval = "10m"
 ## **It's only used when the provider is `kafka`**.
 flush_trigger_size = "512MB"

+## Estimated size threshold to trigger a checkpoint when using Kafka remote WAL.
+## The estimated size is calculated as:
+##   (latest_entry_id - last_checkpoint_entry_id) * avg_record_size
+## MetaSrv triggers a checkpoint for a region when this estimated size exceeds `checkpoint_trigger_size`.
+## Set to "0" to let the system decide the checkpoint trigger size.
+## **It's only used when the provider is `kafka`**.
+checkpoint_trigger_size = "128MB"
+
 ## Concurrent task limit for automatically WAL pruning.
 ## **It's only used when the provider is `kafka`**.
 auto_prune_parallelism = 10
@@ -258,12 +274,15 @@ ttl = "90d"

 ## Configuration options for the stats persistence.
 [stats_persistence]
-## TTL for the stats table that will be used to store the stats. Default is `30d`.
+## TTL for the stats table that will be used to store the stats.
 ## Set to `0s` to disable stats persistence.
-ttl = "30d"
-## The interval to persist the stats. Default is `60s`.
-## The minimum value is `60s`, if the value is less than `60s`, it will be overridden to `60s`.
-interval = "60s"
+## Default is `0s`.
+## If you want to enable stats persistence, set the TTL to a value greater than 0.
+## It is recommended to set a small value, e.g., `3h`.
+ttl = "0s"
+## The interval to persist the stats. Default is `10m`.
+## The minimum value is `10m`, if the value is less than `10m`, it will be overridden to `10m`.
+interval = "10m"

 ## The logging options.
 [logging]
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -567,7 +567,7 @@ sst_write_buffer_size = "8MB"
 parallel_scan_channel_size = 32

 ## Maximum number of SST files to scan concurrently.
-max_concurrent_scan_files = 128
+max_concurrent_scan_files = 384

 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false
--- a/docker/dev-builder/android/Dockerfile
+++ b/docker/dev-builder/android/Dockerfile
@@ -13,7 +13,8 @@ RUN apt-get update && apt-get install -y \
    git \
    unzip \
    build-essential \
-    pkg-config
+    pkg-config \
+    openssh-client

 # Install protoc
 ARG PROTOBUF_VERSION=29.3
--- a/docker/docker-compose/cluster-with-etcd.yaml
+++ b/docker/docker-compose/cluster-with-etcd.yaml
@@ -34,6 +34,48 @@ services:
    networks:
      - greptimedb

+  etcd-tls:
+    <<: *etcd_common_settings
+    container_name: etcd-tls
+    ports:
+      - 2378:2378
+      - 2381:2381
+    command:
+      - --name=etcd-tls
+      - --data-dir=/var/lib/etcd
+      - --initial-advertise-peer-urls=https://etcd-tls:2381
+      - --listen-peer-urls=https://0.0.0.0:2381
+      - --listen-client-urls=https://0.0.0.0:2378
+      - --advertise-client-urls=https://etcd-tls:2378
+      - --heartbeat-interval=250
+      - --election-timeout=1250
+      - --initial-cluster=etcd-tls=https://etcd-tls:2381
+      - --initial-cluster-state=new
+      - --initial-cluster-token=etcd-tls-cluster
+      - --cert-file=/certs/server.crt
+      - --key-file=/certs/server-key.pem
+      - --peer-cert-file=/certs/server.crt
+      - --peer-key-file=/certs/server-key.pem
+      - --trusted-ca-file=/certs/ca.crt
+      - --peer-trusted-ca-file=/certs/ca.crt
+      - --client-cert-auth
+      - --peer-client-cert-auth
+    volumes:
+      - ./greptimedb-cluster-docker-compose/etcd-tls:/var/lib/etcd
+      - ./greptimedb-cluster-docker-compose/certs:/certs:ro
+    environment:
+      - ETCDCTL_API=3
+      - ETCDCTL_CACERT=/certs/ca.crt
+      - ETCDCTL_CERT=/certs/server.crt
+      - ETCDCTL_KEY=/certs/server-key.pem
+    healthcheck:
+      test: [ "CMD", "etcdctl", "--endpoints=https://etcd-tls:2378", "--cacert=/certs/ca.crt", "--cert=/certs/server.crt", "--key=/certs/server-key.pem", "endpoint", "health" ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - greptimedb
+
  metasrv:
    image: *greptimedb_image
    container_name: metasrv
--- a/docs/rfcs/2025-08-16-async-index-build.md
+++ b/docs/rfcs/2025-08-16-async-index-build.md
@@ -0,0 +1,112 @@
+---
+Feature Name: Async Index Build
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/6756
+Date: 2025-08-16
+Author: "SNC123 <sinhco@outlook.com>"
+---
+
+# Summary
+This RFC proposes an asynchronous index build mechanism in the database, with a configuration option to choose between synchronous and asynchronous modes, aiming to improve flexibility and adapt to different workload requirements.
+
+# Motivation
+Currently, index creation is performed synchronously, which may lead to prolonged write suspension and impact business continuity. As data volume grows, the time required for index building increases significantly. An asynchronous solution is urgently needed to enhance user experience and system throughput.
+
+# Details
+
+## Overview
+
+The following table highlights the difference between async and sync index approach:
+
+| Approach | Trigger | Data Source | Additional Index Metadata Installation | Fine-grained `FileMeta` Index |
+| :--- | :--- | :--- | :--- | :--- |
+| Sync Index | On `write_sst` | Memory (on flush) / Disk (on compact) | Not required(already installed synchronously) | Not required |
+| Async Index | 4 trigger types | Disk | Required | Required |
+
+The index build mode (synchronous or asynchronous) can be selected via configuration file. 
+
+### Four Trigger Types
+
+This RFC introduces four `IndexBuildType`s to trigger index building:
+
+- **Manual Rebuild**: Triggered by the user via `ADMIN build_index("table_name")`, for scenarios like recovering from failed builds or migrating data. SST files whose `ColumnIndexMetadata` (see below) is already consistent with the `RegionMetadata` will be skipped.
+- **Schema Change**: Automatically triggered when the schema of an indexed column is altered.
+- **Flush**: Automatically builds indexes for new SST files created by a flush.
+- **Compact**: Automatically builds indexes for new SST files created by a compaction.
+
+### Additional Index Metadata Installation
+
+Previously, index information in the in-memory `FileMeta` was updated synchronously. The async approach requires an explicit installation step.
+
+A race condition can occur when compaction and index building run concurrently, leading to:
+1. Building an index for a file that is about to be deleted by compaction.
+2. Creating an unnecessary index file and an incorrect manifest record.
+3. On restart, replaying the manifest could load metadata for a non-existent file.
+
+To prevent this, the system checks if a file's `FileMeta` is in a `compacting` state before updating the manifest. If it is, the installation is aborted.
+
+### Fine-grained `FileMeta` Index
+
+The original `FileMeta` only stored file-level index information. However, manual rebuilds require column-level details to identify files inconsistent with the current DDL. Therefore, the `indexes` field in `FileMeta` is updated as follows:
+```rust
+struct FileMeta {
+    ...
+    // From file-level:
+    // available_indexes: SmallVec<[IndexType; 4]>
+    // To column-level:
+    indexes: Vec<ColumnIndexMetadata>,
+    ...
+}
+pub struct ColumnIndexMetadata {
+    pub column_id: ColumnId,
+    pub created_indexes: IndexTypes,
+}
+```
+
+## Process
+
+The index building process is similar to a flush and is illustrated below:
+
+```mermaid
+sequenceDiagram
+    Region0->>Region0: Triggered by one of 4 conditions, targets specific files
+    loop For each target file
+    Region0->>IndexBuildScheduler: Submits an index build task
+    end
+    IndexBuildScheduler->>IndexBuildTask: Executes the task
+    IndexBuildTask->>Storage Interfaces: Reads SST data from disk
+    IndexBuildTask->>IndexBuildTask: Builds the index file
+    alt Index file size > 0
+    IndexBuildTask->>Region0: Sends IndexBuildFinished notification
+    end
+    alt File exists in Version and is not compacting
+    Region0->>Storage Interfaces: Updates manifest and Version
+    end
+```
+
+### Task Triggering and Scheduling
+
+The process starts with one of the four `IndexBuildType` triggers. In `handle_rebuild_index`, the `RegionWorkerLoop` identifies target SSTs from the request or the current region version. It then creates an `IndexBuildTask` for each file and submits it to the `index_build_scheduler`.
+
+Similar to Flush and Compact operations, index build tasks are ultimately dispatched to the LocalScheduler. Resource usage can be adjusted via configuration files. Since asynchronous index tasks are both memory-intensive and IO-intensive but have lower priority, it is recommended to allocate fewer resources to them compared to compaction and flush tasks—for example, limiting them to 1/8 of the CPU cores. 
+
+### Index Building and Notification
+
+The scheduled `IndexBuildTask` executes its `index_build` method. It uses an `indexer_builder` to create an `Indexer` that reads SST data and builds the index. If a new index file is created (`IndexOutput.file_size > 0`), the task sends an `IndexBuildFinished` notification back to the `RegionWorkerLoop`.
+
+### Index Metadata Installation
+
+Upon receiving the `IndexBuildFinished` notification in `handle_index_build_finished`, the `RegionWorkerLoop` verifies that the file still exists in the current `version` and is not being compacted. If the check passes, it calls `manifest_ctx.update_manifest` to apply a `RegionEdit` with the new index information, completing the installation.
+
+# Drawbacks
+
+Asynchronous index building may consume extra system resources, potentially affecting overall performance during peak periods.
+
+There may be a delay before the new index becomes available for queries, which could impact certain use cases.
+
+# Unresolved Questions and Future Work
+
+**Resource Management and Throttling**: The resource consumption (CPU, I/O) of background index building can be managed and limited to some extent by configuring a dedicated background thread pool. However, this approach cannot fully eliminate resource contention, especially under heavy workloads or when I/O is highly competitive. Additional throttling mechanisms or dynamic prioritization may still be necessary to avoid impacting foreground operations.
+
+# Alternatives
+
+Instead of being triggered by events like Flush or Compact, index building could be performed in batches during scheduled maintenance windows. This offers predictable resource usage but delays index availability.
--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -87,6 +87,13 @@
 | Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
+# Remote WAL
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Triggered region flush total | `meta_triggered_region_flush_total` | `timeseries` | Triggered region flush total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Triggered region checkpoint total | `meta_triggered_region_checkpoint_total` | `timeseries` | Triggered region checkpoint total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Topic estimated replay size | `meta_topic_estimated_replay_size` | `timeseries` | Topic estimated max replay size | `prometheus` | `bytes` | `{{pod}}-{{topic_name}}` |
+| Kafka logstore's bytes traffic | `rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])` | `timeseries` | Kafka logstore's bytes traffic | `prometheus` | `bytes` | `{{pod}}-{{logstore}}` |
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -103,6 +110,8 @@
 | Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
 | Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
 | DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
+| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
+| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps  | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -802,6 +802,48 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
+    - title: Remote WAL
+      panels:
+        - title: Triggered region flush total
+          type: timeseries
+          description: Triggered region flush total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_flush_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Triggered region checkpoint total
+          type: timeseries
+          description: Triggered region checkpoint total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_checkpoint_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Topic estimated replay size
+          type: timeseries
+          description: Topic estimated max replay size
+          unit: bytes
+          queries:
+            - expr: meta_topic_estimated_replay_size
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Kafka logstore's bytes traffic
+          type: timeseries
+          description: Kafka logstore's bytes traffic
+          unit: bytes
+          queries:
+            - expr: rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{logstore}}'
    - title: Metasrv
      panels:
        - title: Region migration datanode
@@ -948,6 +990,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: AlterTable-{{step}} p90
+        - title: Reconciliation stats
+          type: timeseries
+          description: Reconciliation stats
+          unit: s
+          queries:
+            - expr: greptime_meta_reconciliation_stats
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{table_type}}-{{type}}'
+        - title: Reconciliation steps
+          type: timeseries
+          description: 'Elapsed of Reconciliation steps '
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{procedure_name}}-{{step}}-P90'
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -87,6 +87,13 @@
 | Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
+# Remote WAL
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Triggered region flush total | `meta_triggered_region_flush_total` | `timeseries` | Triggered region flush total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Triggered region checkpoint total | `meta_triggered_region_checkpoint_total` | `timeseries` | Triggered region checkpoint total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Topic estimated replay size | `meta_topic_estimated_replay_size` | `timeseries` | Topic estimated max replay size | `prometheus` | `bytes` | `{{pod}}-{{topic_name}}` |
+| Kafka logstore's bytes traffic | `rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])` | `timeseries` | Kafka logstore's bytes traffic | `prometheus` | `bytes` | `{{pod}}-{{logstore}}` |
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -103,6 +110,8 @@
 | Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
 | Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
 | DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
+| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
+| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps  | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -802,6 +802,48 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
+    - title: Remote WAL
+      panels:
+        - title: Triggered region flush total
+          type: timeseries
+          description: Triggered region flush total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_flush_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Triggered region checkpoint total
+          type: timeseries
+          description: Triggered region checkpoint total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_checkpoint_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Topic estimated replay size
+          type: timeseries
+          description: Topic estimated max replay size
+          unit: bytes
+          queries:
+            - expr: meta_topic_estimated_replay_size
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Kafka logstore's bytes traffic
+          type: timeseries
+          description: Kafka logstore's bytes traffic
+          unit: bytes
+          queries:
+            - expr: rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{logstore}}'
    - title: Metasrv
      panels:
        - title: Region migration datanode
@@ -948,6 +990,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: AlterTable-{{step}} p90
+        - title: Reconciliation stats
+          type: timeseries
+          description: Reconciliation stats
+          unit: s
+          queries:
+            - expr: greptime_meta_reconciliation_stats
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{table_type}}-{{type}}'
+        - title: Reconciliation steps
+          type: timeseries
+          description: 'Elapsed of Reconciliation steps '
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{procedure_name}}-{{step}}-P90'
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
--- a/scripts/fix-udeps.py
+++ b/scripts/fix-udeps.py
@@ -0,0 +1,265 @@
+# Copyright 2023 Greptime Team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import re
+import sys
+
+
+def load_udeps_report(report_path):
+    try:
+        with open(report_path, "r") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        print(f"Error: Report file '{report_path}' not found.")
+        return None
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON in report file: {e}")
+        return None
+
+
+def extract_unused_dependencies(report):
+    """
+    Extract and organize unused dependencies from the cargo-udeps JSON report.
+
+    The cargo-udeps report has this structure:
+    {
+        "unused_deps": {
+            "package_name v0.1.0 (/path/to/package)": {
+                "normal": ["dep1", "dep2"],
+                "development": ["dev_dep1"],
+                "build": ["build_dep1"],
+                "manifest_path": "/path/to/Cargo.toml"
+            }
+        }
+    }
+
+    Args:
+        report (dict): The parsed JSON report from cargo-udeps
+
+    Returns:
+        dict: Organized unused dependencies by package name:
+        {
+            "package_name": {
+                "dependencies": [("dep1", "normal"), ("dev_dep1", "dev")],
+                "manifest_path": "/path/to/Cargo.toml"
+            }
+        }
+    """
+    if not report or "unused_deps" not in report:
+        return {}
+
+    unused_deps = {}
+    for package_full_name, deps_info in report["unused_deps"].items():
+        package_name = package_full_name.split(" ")[0]
+
+        all_unused = []
+        if deps_info.get("normal"):
+            all_unused.extend([(dep, "normal") for dep in deps_info["normal"]])
+        if deps_info.get("development"):
+            all_unused.extend([(dep, "dev") for dep in deps_info["development"]])
+        if deps_info.get("build"):
+            all_unused.extend([(dep, "build") for dep in deps_info["build"]])
+
+        if all_unused:
+            unused_deps[package_name] = {
+                "dependencies": all_unused,
+                "manifest_path": deps_info.get("manifest_path", "unknown"),
+            }
+
+    return unused_deps
+
+
+def get_section_pattern(dep_type):
+    """
+    Get regex patterns to identify different dependency sections in Cargo.toml.
+
+    Args:
+        dep_type (str): Type of dependency ("normal", "dev", or "build")
+
+    Returns:
+        list: List of regex patterns to match the appropriate section headers
+
+    """
+    patterns = {
+        "normal": [r"\[dependencies\]", r"\[dependencies\..*?\]"],
+        "dev": [r"\[dev-dependencies\]", r"\[dev-dependencies\..*?\]"],
+        "build": [r"\[build-dependencies\]", r"\[build-dependencies\..*?\]"],
+    }
+    return patterns.get(dep_type, [])
+
+
+def remove_dependency_line(content, dep_name, section_start, section_end):
+    """
+    Remove a dependency line from a specific section of a Cargo.toml file.
+
+    Args:
+        content (str): The entire content of the Cargo.toml file
+        dep_name (str): Name of the dependency to remove (e.g., "serde", "tokio")
+        section_start (int): Starting position of the section in the content
+        section_end (int): Ending position of the section in the content
+
+    Returns:
+        tuple: (new_content, removed) where:
+            - new_content (str): The modified content with dependency removed
+            - removed (bool): True if dependency was found and removed, False otherwise
+
+    Example input content format:
+        content = '''
+        [package]
+        name = "my-crate"
+        version = "0.1.0"
+
+        [dependencies]
+        serde = "1.0"
+        tokio = { version = "1.0", features = ["full"] }
+        serde_json.workspace = true
+
+        [dev-dependencies]
+        tempfile = "3.0"
+        '''
+
+        # If dep_name = "serde", section_start = start of [dependencies],
+        # section_end = start of [dev-dependencies], this function will:
+        # 1. Extract the section: "serde = "1.0"\ntokio = { version = "1.0", features = ["full"] }\nserde_json.workspace = true\n"
+        # 2. Find and remove the line: "serde = "1.0""
+        # 3. Return the modified content with that line removed
+    """
+    section_content = content[section_start:section_end]
+
+    dep_patterns = [
+        rf"^{re.escape(dep_name)}\s*=.*$",  # e.g., "serde = "1.0""
+        rf"^{re.escape(dep_name)}\.workspace\s*=.*$",  # e.g., "serde_json.workspace = true"
+    ]
+
+    for pattern in dep_patterns:
+        match = re.search(pattern, section_content, re.MULTILINE)
+        if match:
+            line_start = section_start + match.start()  # Start of the matched line
+            line_end = section_start + match.end()  # End of the matched line
+
+            if line_end < len(content) and content[line_end] == "\n":
+                line_end += 1
+
+            return content[:line_start] + content[line_end:], True
+
+    return content, False
+
+
+def remove_dependency_from_toml(file_path, dep_name, dep_type):
+    """
+    Remove a specific dependency from a Cargo.toml file.
+
+    Args:
+        file_path (str): Path to the Cargo.toml file
+        dep_name (str): Name of the dependency to remove
+        dep_type (str): Type of dependency ("normal", "dev", or "build")
+
+    Returns:
+        bool: True if dependency was successfully removed, False otherwise
+    """
+    try:
+        with open(file_path, "r") as f:
+            content = f.read()
+
+        section_patterns = get_section_pattern(dep_type)
+        if not section_patterns:
+            return False
+
+        for pattern in section_patterns:
+            section_match = re.search(pattern, content, re.IGNORECASE)
+            if not section_match:
+                continue
+
+            section_start = section_match.end()
+            next_section = re.search(r"\n\s*\[", content[section_start:])
+            section_end = (
+                section_start + next_section.start() if next_section else len(content)
+            )
+
+            new_content, removed = remove_dependency_line(
+                content, dep_name, section_start, section_end
+            )
+            if removed:
+                with open(file_path, "w") as f:
+                    f.write(new_content)
+                return True
+
+        return False
+
+    except Exception as e:
+        print(f"Error processing {file_path}: {e}")
+        return False
+
+
+def process_unused_dependencies(unused_deps):
+    """
+    Process and remove all unused dependencies from their respective Cargo.toml files.
+
+    Args:
+        unused_deps (dict): Dictionary of unused dependencies organized by package:
+            {
+                "package_name": {
+                    "dependencies": [("dep1", "normal"), ("dev_dep1", "dev")],
+                    "manifest_path": "/path/to/Cargo.toml"
+                }
+            }
+
+    """
+    if not unused_deps:
+        print("No unused dependencies found.")
+        return
+
+    total_removed = 0
+    total_failed = 0
+
+    for package, info in unused_deps.items():
+        deps = info["dependencies"]
+        manifest_path = info["manifest_path"]
+
+        if not os.path.exists(manifest_path):
+            print(f"Manifest file not found: {manifest_path}")
+            total_failed += len(deps)
+            continue
+
+        for dep, dep_type in deps:
+            if remove_dependency_from_toml(manifest_path, dep, dep_type):
+                print(f"Removed {dep} from {package}")
+                total_removed += 1
+            else:
+                print(f"Failed to remove {dep} from {package}")
+                total_failed += 1
+
+    print(f"Removed {total_removed} dependencies")
+    if total_failed > 0:
+        print(f"Failed to remove {total_failed} dependencies")
+
+
+def main():
+    if len(sys.argv) > 1:
+        report_path = sys.argv[1]
+    else:
+        report_path = "udeps-report.json"
+
+    report = load_udeps_report(report_path)
+    if report is None:
+        sys.exit(1)
+
+    unused_deps = extract_unused_dependencies(report)
+    process_unused_dependencies(unused_deps)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/generate-etcd-tls-certs.sh
+++ b/scripts/generate-etcd-tls-certs.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Generate TLS certificates for etcd testing
+# This script creates certificates for TLS-enabled etcd in testing environments
+
+set -euo pipefail
+
+CERT_DIR="${1:-$(dirname "$0")/../tests-integration/fixtures/etcd-tls-certs}"
+DAYS="${2:-365}"
+
+echo "Generating TLS certificates for etcd in ${CERT_DIR}..."
+
+mkdir -p "${CERT_DIR}"
+cd "${CERT_DIR}"
+
+echo "Generating CA private key..."
+openssl genrsa -out ca-key.pem 2048
+
+echo "Generating CA certificate..."
+openssl req -new -x509 -key ca-key.pem -out ca.crt -days "${DAYS}" \
+  -subj "/C=US/ST=CA/L=SF/O=Greptime/CN=etcd-ca"
+
+# Create server certificate config with Subject Alternative Names
+echo "Creating server certificate configuration..."
+cat > server.conf << 'EOF'
+[req]
+distinguished_name = req
+[v3_req]
+basicConstraints = CA:FALSE
+keyUsage = keyEncipherment, dataEncipherment
+subjectAltName = @alt_names
+[alt_names]
+DNS.1 = localhost
+DNS.2 = etcd-tls
+DNS.3 = 127.0.0.1
+IP.1 = 127.0.0.1
+IP.2 = ::1
+EOF
+
+echo "Generating server private key..."
+openssl genrsa -out server-key.pem 2048
+
+echo "Generating server certificate signing request..."
+openssl req -new -key server-key.pem -out server.csr \
+  -subj "/CN=etcd-tls"
+
+echo "Generating server certificate..."
+openssl x509 -req -in server.csr -CA ca.crt \
+  -CAkey ca-key.pem -CAcreateserial -out server.crt \
+  -days "${DAYS}" -extensions v3_req -extfile server.conf
+
+echo "Generating client private key..."
+openssl genrsa -out client-key.pem 2048
+
+echo "Generating client certificate signing request..."
+openssl req -new -key client-key.pem -out client.csr \
+  -subj "/CN=etcd-client"
+
+echo "Generating client certificate..."
+openssl x509 -req -in client.csr -CA ca.crt \
+  -CAkey ca-key.pem -CAcreateserial -out client.crt \
+  -days "${DAYS}"
+
+echo "Setting proper file permissions..."
+chmod 644 ca.crt server.crt client.crt
+chmod 600 ca-key.pem server-key.pem client-key.pem
+
+# Clean up intermediate files
+rm -f server.csr client.csr server.conf
+
+echo "TLS certificates generated successfully in ${CERT_DIR}"
--- a/src/auth/src/permission.rs
+++ b/src/auth/src/permission.rs
@@ -32,6 +32,7 @@ pub enum PermissionReq<'a> {
    PromStoreRead,
    Otlp,
    LogWrite,
+    BulkInsert,
 }

 #[derive(Debug)]
--- a/src/catalog/src/system_schema/information_schema/tables.rs
+++ b/src/catalog/src/system_schema/information_schema/tables.rs
@@ -30,8 +30,7 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::value::Value;
 use datatypes::vectors::{
-    StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
-    UInt64VectorBuilder,
+    StringVectorBuilder, TimestampSecondVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
 };
 use futures::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
@@ -107,17 +106,17 @@ impl InformationSchemaTables {
            ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
            ColumnSchema::new(
                CREATE_TIME,
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(
                UPDATE_TIME,
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(
                CHECK_TIME,
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
@@ -194,9 +193,9 @@ struct InformationSchemaTablesBuilder {
    max_index_length: UInt64VectorBuilder,
    data_free: UInt64VectorBuilder,
    auto_increment: UInt64VectorBuilder,
-    create_time: TimestampMicrosecondVectorBuilder,
-    update_time: TimestampMicrosecondVectorBuilder,
-    check_time: TimestampMicrosecondVectorBuilder,
+    create_time: TimestampSecondVectorBuilder,
+    update_time: TimestampSecondVectorBuilder,
+    check_time: TimestampSecondVectorBuilder,
    table_collation: StringVectorBuilder,
    checksum: UInt64VectorBuilder,
    create_options: StringVectorBuilder,
@@ -231,9 +230,9 @@ impl InformationSchemaTablesBuilder {
            max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
-            create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
-            update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
-            check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            create_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            update_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            check_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
            table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -380,7 +379,7 @@ impl InformationSchemaTablesBuilder {
        self.create_options
            .push(Some(table_info.meta.options.to_string().as_ref()));
        self.create_time
-            .push(Some(table_info.meta.created_on.timestamp_millis().into()));
+            .push(Some(table_info.meta.created_on.timestamp().into()));

        self.temporary
            .push(if matches!(table_type, TableType::Temporary) {
--- a/src/cli/src/bench.rs
+++ b/src/cli/src/bench.rs
@@ -66,6 +66,9 @@ pub struct BenchTableMetadataCommand {
    #[cfg(feature = "pg_kvbackend")]
    #[clap(long)]
    postgres_addr: Option<String>,
+    #[cfg(feature = "pg_kvbackend")]
+    #[clap(long)]
+    postgres_schema: Option<String>,
    #[cfg(feature = "mysql_kvbackend")]
    #[clap(long)]
    mysql_addr: Option<String>,
--- a/src/cli/src/metadata/common.rs
+++ b/src/cli/src/metadata/common.rs
@@ -19,8 +19,9 @@ use common_error::ext::BoxedError;
 use common_meta::kv_backend::chroot::ChrootKvBackend;
 use common_meta::kv_backend::etcd::EtcdStore;
 use common_meta::kv_backend::KvBackendRef;
-use meta_srv::bootstrap::create_etcd_client;
+use meta_srv::bootstrap::create_etcd_client_with_tls;
 use meta_srv::metasrv::BackendImpl;
+use servers::tls::{TlsMode, TlsOption};

 use crate::error::{EmptyStoreAddrsSnafu, UnsupportedMemoryBackendSnafu};

@@ -55,9 +56,47 @@ pub(crate) struct StoreConfig {
    #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
    #[clap(long, default_value = common_meta::kv_backend::DEFAULT_META_TABLE_NAME)]
    meta_table_name: String,
+
+    /// Optional PostgreSQL schema for metadata table (defaults to current search_path if unset).
+    #[cfg(feature = "pg_kvbackend")]
+    #[clap(long)]
+    meta_schema_name: Option<String>,
+    /// TLS mode for backend store connections (etcd, PostgreSQL, MySQL)
+    #[clap(long = "backend-tls-mode", value_enum, default_value = "disable")]
+    backend_tls_mode: TlsMode,
+
+    /// Path to TLS certificate file for backend store connections
+    #[clap(long = "backend-tls-cert-path", default_value = "")]
+    backend_tls_cert_path: String,
+
+    /// Path to TLS private key file for backend store connections
+    #[clap(long = "backend-tls-key-path", default_value = "")]
+    backend_tls_key_path: String,
+
+    /// Path to TLS CA certificate file for backend store connections
+    #[clap(long = "backend-tls-ca-cert-path", default_value = "")]
+    backend_tls_ca_cert_path: String,
+
+    /// Enable watching TLS certificate files for changes
+    #[clap(long = "backend-tls-watch")]
+    backend_tls_watch: bool,
 }

 impl StoreConfig {
+    pub fn tls_config(&self) -> Option<TlsOption> {
+        if self.backend_tls_mode != TlsMode::Disable {
+            Some(TlsOption {
+                mode: self.backend_tls_mode.clone(),
+                cert_path: self.backend_tls_cert_path.clone(),
+                key_path: self.backend_tls_key_path.clone(),
+                ca_cert_path: self.backend_tls_ca_cert_path.clone(),
+                watch: self.backend_tls_watch,
+            })
+        } else {
+            None
+        }
+    }
+
    /// Builds a [`KvBackendRef`] from the store configuration.
    pub async fn build(&self) -> Result<KvBackendRef, BoxedError> {
        let max_txn_ops = self.max_txn_ops;
@@ -67,7 +106,8 @@ impl StoreConfig {
        } else {
            let kvbackend = match self.backend {
                BackendImpl::EtcdStore => {
-                    let etcd_client = create_etcd_client(store_addrs)
+                    let tls_config = self.tls_config();
+                    let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
                        .await
                        .map_err(BoxedError::new)?;
                    Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
@@ -75,11 +115,14 @@ impl StoreConfig {
                #[cfg(feature = "pg_kvbackend")]
                BackendImpl::PostgresStore => {
                    let table_name = &self.meta_table_name;
-                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, None)
+                    let tls_config = self.tls_config();
+                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, tls_config)
                        .await
                        .map_err(BoxedError::new)?;
+                    let schema_name = self.meta_schema_name.as_deref();
                    Ok(common_meta::kv_backend::rds::PgStore::with_pg_pool(
                        pool,
+                        schema_name,
                        table_name,
                        max_txn_ops,
                    )
--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -473,8 +473,8 @@ impl Database {
        }) = &self.ctx.auth_header
        {
            let encoded = BASE64_STANDARD.encode(format!("{username}:{password}"));
-            let value =
-                MetadataValue::from_str(&encoded).context(InvalidTonicMetadataValueSnafu)?;
+            let value = MetadataValue::from_str(&format!("Basic {encoded}"))
+                .context(InvalidTonicMetadataValueSnafu)?;
            request.metadata_mut().insert("x-greptime-auth", value);
        }

--- a/src/client/src/inserter.rs
+++ b/src/client/src/inserter.rs
@@ -16,7 +16,7 @@ use std::time::Duration;

 use api::v1::RowInsertRequests;
 use humantime::format_duration;
-use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY};
+use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY, TWCS_TIME_WINDOW};

 use crate::error::Result;

@@ -35,15 +35,23 @@ pub struct InsertOptions {
    pub ttl: Duration,
    /// Whether to use append mode for the insert.
    pub append_mode: bool,
+    /// Time window for twcs compaction.
+    pub twcs_compaction_time_window: Option<Duration>,
 }

 impl InsertOptions {
    /// Converts the insert options to a list of key-value string hints.
    pub fn to_hints(&self) -> Vec<(&'static str, String)> {
-        vec![
+        let mut hints = vec![
            (TTL_KEY, format_duration(self.ttl).to_string()),
            (APPEND_MODE_KEY, self.append_mode.to_string()),
-        ]
+        ];
+
+        if let Some(time_window) = self.twcs_compaction_time_window {
+            hints.push((TWCS_TIME_WINDOW, format_duration(time_window).to_string()));
+        }
+
+        hints
    }
 }

--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -103,3 +103,6 @@ tempfile.workspace = true

 [target.'cfg(not(windows))'.dev-dependencies]
 rexpect = "0.5"
+
+[package.metadata.cargo-udeps.ignore]
+development = ["rexpect"]
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -41,6 +41,7 @@ use frontend::server::Services;
 use meta_client::{MetaClientOptions, MetaClientType};
 use servers::addrs;
 use servers::export_metrics::ExportMetricsTask;
+use servers::grpc::GrpcOptions;
 use servers::tls::{TlsMode, TlsOption};
 use snafu::{OptionExt, ResultExt};
 use tracing_appender::non_blocking::WorkerGuard;
@@ -144,6 +145,14 @@ pub struct StartCommand {
    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
    #[clap(long, alias = "rpc-hostname")]
    rpc_server_addr: Option<String>,
+    /// The address to bind the internal gRPC server.
+    #[clap(long, alias = "internal-rpc-addr")]
+    internal_rpc_bind_addr: Option<String>,
+    /// The address advertised to the metasrv, and used for connections from outside the host.
+    /// If left empty or unset, the server will automatically use the IP address of the first network interface
+    /// on the host, with the same port number as the one specified in `internal_rpc_bind_addr`.
+    #[clap(long, alias = "internal-rpc-hostname")]
+    internal_rpc_server_addr: Option<String>,
    #[clap(long)]
    http_addr: Option<String>,
    #[clap(long)]
@@ -241,6 +250,31 @@ impl StartCommand {
            opts.grpc.server_addr.clone_from(addr);
        }

+        if let Some(addr) = &self.internal_rpc_bind_addr {
+            if let Some(internal_grpc) = &mut opts.internal_grpc {
+                internal_grpc.bind_addr = addr.to_string();
+            } else {
+                let grpc_options = GrpcOptions {
+                    bind_addr: addr.to_string(),
+                    ..Default::default()
+                };
+
+                opts.internal_grpc = Some(grpc_options);
+            }
+        }
+
+        if let Some(addr) = &self.internal_rpc_server_addr {
+            if let Some(internal_grpc) = &mut opts.internal_grpc {
+                internal_grpc.server_addr = addr.to_string();
+            } else {
+                let grpc_options = GrpcOptions {
+                    server_addr: addr.to_string(),
+                    ..Default::default()
+                };
+                opts.internal_grpc = Some(grpc_options);
+            }
+        }
+
        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
            opts.mysql.addr.clone_from(addr);
@@ -448,6 +482,8 @@ mod tests {
            http_addr: Some("127.0.0.1:1234".to_string()),
            mysql_addr: Some("127.0.0.1:5678".to_string()),
            postgres_addr: Some("127.0.0.1:5432".to_string()),
+            internal_rpc_bind_addr: Some("127.0.0.1:4010".to_string()),
+            internal_rpc_server_addr: Some("10.0.0.24:4010".to_string()),
            influxdb_enable: Some(false),
            disable_dashboard: Some(false),
            ..Default::default()
@@ -460,6 +496,10 @@ mod tests {
        assert_eq!(opts.mysql.addr, "127.0.0.1:5678");
        assert_eq!(opts.postgres.addr, "127.0.0.1:5432");

+        let internal_grpc = opts.internal_grpc.as_ref().unwrap();
+        assert_eq!(internal_grpc.bind_addr, "127.0.0.1:4010");
+        assert_eq!(internal_grpc.server_addr, "10.0.0.24:4010");
+
        let default_opts = FrontendOptions::default().component;

        assert_eq!(opts.grpc.bind_addr, default_opts.grpc.bind_addr);
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -146,6 +146,7 @@ fn test_load_frontend_example_config() {
            grpc: GrpcOptions::default()
                .with_bind_addr("127.0.0.1:4001")
                .with_server_addr("127.0.0.1:4001"),
+            internal_grpc: Some(GrpcOptions::internal_default()),
            http: HttpOptions {
                cors_allowed_origins: vec!["https://example.com".to_string()],
                ..Default::default()
@@ -198,6 +199,7 @@ fn test_load_metasrv_example_config() {
                ca_cert_path: String::new(),
                watch: false,
            }),
+            meta_schema_name: Some("greptime_schema".to_string()),
            ..Default::default()
        },
        ..Default::default()
--- a/src/common/datasource/src/file_format/parquet.rs
+++ b/src/common/datasource/src/file_format/parquet.rs
@@ -196,7 +196,10 @@ pub async fn stream_to_parquet(
    concurrency: usize,
 ) -> Result<usize> {
    let write_props = column_wise_config(
-        WriterProperties::builder().set_compression(Compression::ZSTD(ZstdLevel::default())),
+        WriterProperties::builder()
+            .set_compression(Compression::ZSTD(ZstdLevel::default()))
+            .set_statistics_truncate_length(None)
+            .set_column_index_truncate_length(None),
        schema,
    )
    .build();
--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -251,7 +251,6 @@ macro_rules! define_from_tonic_status {
                        .get(key)
                        .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
                }
-
                let code = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_CODE)
                    .and_then(|s| {
                        if let Ok(code) = s.parse::<u32>() {
@@ -290,6 +289,8 @@ macro_rules! define_into_tonic_status {
                use tonic::metadata::MetadataMap;
                use $crate::GREPTIME_DB_HEADER_ERROR_CODE;

+                common_telemetry::error!(err; "Failed to handle request");
+
                let mut headers = HeaderMap::<HeaderValue>::with_capacity(2);

                // If either of the status_code or error msg cannot convert to valid HTTP header value
--- a/src/common/event-recorder/src/recorder.rs
+++ b/src/common/event-recorder/src/recorder.rs
@@ -56,6 +56,8 @@ pub type EventRecorderRef = Arc<dyn EventRecorder>;
 pub const DEFAULT_FLUSH_INTERVAL_SECONDS: Duration = Duration::from_secs(5);
 /// The default TTL(90 days) for the events table.
 const DEFAULT_EVENTS_TABLE_TTL: Duration = Duration::from_days(90);
+/// The default compaction time window for the events table.
+pub const DEFAULT_COMPACTION_TIME_WINDOW: Duration = Duration::from_days(1);
 // The capacity of the tokio channel for transmitting events to background processor.
 const DEFAULT_CHANNEL_SIZE: usize = 2048;
 // The size of the buffer for batching events before flushing to event handler.
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -57,7 +57,6 @@ serde_json.workspace = true
 session.workspace = true
 snafu.workspace = true
 sql.workspace = true
-statrs = "0.16"
 store-api.workspace = true
 table.workspace = true
 uddsketch = { git = "https://github.com/GreptimeTeam/timescaledb-toolkit.git", rev = "84828fe8fb494a6a61412a3da96517fc80f7bb20" }
--- a/src/common/function/src/scalars/math/clamp.rs
+++ b/src/common/function/src/scalars/math/clamp.rs
@@ -34,6 +34,33 @@ pub struct ClampFunction;

 const CLAMP_NAME: &str = "clamp";

+/// Ensure the vector is constant and not empty (i.e., all values are identical)
+fn ensure_constant_vector(vector: &VectorRef) -> Result<()> {
+    ensure!(
+        !vector.is_empty(),
+        InvalidFuncArgsSnafu {
+            err_msg: "Expect at least one value",
+        }
+    );
+
+    if vector.is_const() {
+        return Ok(());
+    }
+
+    let first = vector.get_ref(0);
+    for i in 1..vector.len() {
+        let v = vector.get_ref(i);
+        if first != v {
+            return InvalidFuncArgsSnafu {
+                err_msg: "All values in min/max argument must be identical",
+            }
+            .fail();
+        }
+    }
+
+    Ok(())
+}
+
 impl Function for ClampFunction {
    fn name(&self) -> &str {
        CLAMP_NAME
@@ -80,16 +107,9 @@ impl Function for ClampFunction {
                ),
            }
        );
-        ensure!(
-            (columns[1].len() == 1 || columns[1].is_const())
-                && (columns[2].len() == 1 || columns[2].is_const()),
-            InvalidFuncArgsSnafu {
-                err_msg: format!(
-                    "The second and third args should be scalar, have: {:?}, {:?}",
-                    columns[1], columns[2]
-                ),
-            }
-        );
+
+        ensure_constant_vector(&columns[1])?;
+        ensure_constant_vector(&columns[2])?;

        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
            let input_array = columns[0].to_arrow_array();
@@ -204,15 +224,8 @@ impl Function for ClampMinFunction {
                ),
            }
        );
-        ensure!(
-            columns[1].len() == 1 || columns[1].is_const(),
-            InvalidFuncArgsSnafu {
-                err_msg: format!(
-                    "The second arg (min) should be scalar, have: {:?}",
-                    columns[1]
-                ),
-            }
-        );
+
+        ensure_constant_vector(&columns[1])?;

        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
            let input_array = columns[0].to_arrow_array();
@@ -292,15 +305,8 @@ impl Function for ClampMaxFunction {
                ),
            }
        );
-        ensure!(
-            columns[1].len() == 1 || columns[1].is_const(),
-            InvalidFuncArgsSnafu {
-                err_msg: format!(
-                    "The second arg (max) should be scalar, have: {:?}",
-                    columns[1]
-                ),
-            }
-        );
+
+        ensure_constant_vector(&columns[1])?;

        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
            let input_array = columns[0].to_arrow_array();
@@ -537,8 +543,8 @@ mod test {
        let func = ClampFunction;
        let args = [
            Arc::new(Float64Vector::from(input)) as _,
-            Arc::new(Float64Vector::from_vec(vec![min, min])) as _,
-            Arc::new(Float64Vector::from_vec(vec![max])) as _,
+            Arc::new(Float64Vector::from_vec(vec![min, max])) as _,
+            Arc::new(Float64Vector::from_vec(vec![max, min])) as _,
        ];
        let result = func.eval(&FunctionContext::default(), args.as_slice());
        assert!(result.is_err());
--- a/src/common/function/src/scalars/udf.rs
+++ b/src/common/function/src/scalars/udf.rs
@@ -16,15 +16,12 @@ use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;

-use common_query::error::FromScalarValueSnafu;
 use common_query::prelude::ColumnarValue;
 use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
 use datafusion_expr::ScalarUDF;
 use datatypes::data_type::DataType;
 use datatypes::prelude::*;
-use datatypes::vectors::Helper;
 use session::context::QueryContextRef;
-use snafu::ResultExt;

 use crate::function::{FunctionContext, FunctionRef};
 use crate::state::FunctionState;
@@ -76,13 +73,7 @@ impl ScalarUDFImpl for ScalarUdf {
        let columns = args
            .args
            .iter()
-            .map(|x| {
-                ColumnarValue::try_from(x).and_then(|y| match y {
-                    ColumnarValue::Vector(z) => Ok(z),
-                    ColumnarValue::Scalar(z) => Helper::try_from_scalar_value(z, args.number_rows)
-                        .context(FromScalarValueSnafu),
-                })
-            })
+            .map(|x| ColumnarValue::try_from(x).and_then(|y| y.try_into_vector(args.number_rows)))
            .collect::<common_query::error::Result<Vec<_>>>()?;
        let v = self
            .function
--- a/src/common/macro/Cargo.toml
+++ b/src/common/macro/Cargo.toml
@@ -21,8 +21,3 @@ syn = { version = "2.0", features = [
 ] }

 [dev-dependencies]
-arc-swap = "1.0"
-common-query.workspace = true
-datatypes.workspace = true
-snafu.workspace = true
-static_assertions = "1.1.0"
--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -94,5 +94,4 @@ common-procedure = { workspace = true, features = ["testing"] }
 common-test-util.workspace = true
 common-wal = { workspace = true, features = ["testing"] }
 datatypes.workspace = true
-hyper = { version = "0.14", features = ["full"] }
 uuid.workspace = true
--- a/src/common/meta/src/cache/flow/table_flownode.rs
+++ b/src/common/meta/src/cache/flow/table_flownode.rs
@@ -242,6 +242,7 @@ mod tests {
                    flow_name: "my_flow".to_string(),
                    raw_sql: "sql".to_string(),
                    expire_after: Some(300),
+                    eval_interval_secs: None,
                    comment: "comment".to_string(),
                    options: Default::default(),
                    created_time: chrono::Utc::now(),
--- a/src/common/meta/src/ddl/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables.rs
@@ -50,6 +50,8 @@ use crate::rpc::router::RegionRoute;
 pub struct AlterLogicalTablesProcedure {
    pub context: DdlContext,
    pub data: AlterTablesData,
+    /// Physical table route cache.
+    pub physical_table_route: Option<PhysicalTableRouteValue>,
 }

 /// Builds the validator from the [`AlterTablesData`].
@@ -93,16 +95,20 @@ impl AlterLogicalTablesProcedure {
                table_info_values: vec![],
                physical_table_id,
                physical_table_info: None,
-                physical_table_route: None,
                physical_columns: vec![],
                table_cache_keys_to_invalidate: vec![],
            },
+            physical_table_route: None,
        }
    }

    pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
        let data = serde_json::from_str(json).context(FromJsonSnafu)?;
-        Ok(Self { context, data })
+        Ok(Self {
+            context,
+            data,
+            physical_table_route: None,
+        })
    }

    pub(crate) async fn on_prepare(&mut self) -> Result<Status> {
@@ -141,21 +147,24 @@ impl AlterLogicalTablesProcedure {
        // Updates the procedure state.
        retain_unskipped(&mut self.data.tasks, &skip_alter);
        self.data.physical_table_info = Some(physical_table_info);
-        self.data.physical_table_route = Some(physical_table_route);
        self.data.table_info_values = table_info_values;
        debug_assert_eq!(self.data.tasks.len(), self.data.table_info_values.len());
+        self.physical_table_route = Some(physical_table_route);
        self.data.state = AlterTablesState::SubmitAlterRegionRequests;
        Ok(Status::executing(true))
    }

    pub(crate) async fn on_submit_alter_region_requests(&mut self) -> Result<Status> {
-        // Safety: we have checked the state in on_prepare
-        let physical_table_route = &self.data.physical_table_route.as_ref().unwrap();
+        self.fetch_physical_table_route_if_non_exist().await?;
+        // Safety: fetched in `fetch_physical_table_route_if_non_exist`.
+        let region_routes = &self.physical_table_route.as_ref().unwrap().region_routes;
+
        let executor = build_executor_from_alter_expr(&self.data);
        let mut results = executor
            .on_alter_regions(
                &self.context.node_manager,
-                &physical_table_route.region_routes,
+                // Avoid double-borrowing self by extracting the region_routes first
+                region_routes,
            )
            .await?;

@@ -166,7 +175,7 @@ impl AlterLogicalTablesProcedure {
        } else {
            warn!("altering logical table result doesn't contains extension key `{ALTER_PHYSICAL_EXTENSION_KEY}`,leaving the physical table's schema unchanged");
        }
-        self.submit_sync_region_requests(results, &physical_table_route.region_routes)
+        self.submit_sync_region_requests(results, region_routes)
            .await;
        self.data.state = AlterTablesState::UpdateMetadata;
        Ok(Status::executing(true))
@@ -232,6 +241,21 @@ impl AlterLogicalTablesProcedure {
            .await?;
        Ok(Status::done())
    }
+
+    /// Fetches the physical table route if it is not already fetched.
+    async fn fetch_physical_table_route_if_non_exist(&mut self) -> Result<()> {
+        if self.physical_table_route.is_none() {
+            let (_, physical_table_route) = self
+                .context
+                .table_metadata_manager
+                .table_route_manager()
+                .get_physical_table_route(self.data.physical_table_id)
+                .await?;
+            self.physical_table_route = Some(physical_table_route);
+        }
+
+        Ok(())
+    }
 }

 #[async_trait]
@@ -261,6 +285,10 @@ impl Procedure for AlterLogicalTablesProcedure {
            AlterTablesState::UpdateMetadata => self.on_update_metadata().await,
            AlterTablesState::InvalidateTableCache => self.on_invalidate_table_cache().await,
        }
+        .inspect_err(|_| {
+            // Reset the physical table route cache.
+            self.physical_table_route = None;
+        })
        .map_err(map_to_procedure_error)
    }

@@ -298,7 +326,6 @@ pub struct AlterTablesData {
    /// Physical table info
    physical_table_id: TableId,
    physical_table_info: Option<DeserializedValueWithBytes<TableInfoValue>>,
-    physical_table_route: Option<PhysicalTableRouteValue>,
    physical_columns: Vec<ColumnMetadata>,
    table_cache_keys_to_invalidate: Vec<CacheIdent>,
 }
@@ -311,7 +338,6 @@ impl AlterTablesData {
        self.table_info_values.clear();
        self.physical_table_id = 0;
        self.physical_table_info = None;
-        self.physical_table_route = None;
        self.physical_columns.clear();
    }
 }
--- a/src/common/meta/src/ddl/alter_logical_tables/update_metadata.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables/update_metadata.rs
@@ -28,9 +28,11 @@ use crate::rpc::router::region_distribution;

 impl AlterLogicalTablesProcedure {
    pub(crate) async fn update_physical_table_metadata(&mut self) -> Result<()> {
+        self.fetch_physical_table_route_if_non_exist().await?;
        // Safety: must exist.
        let physical_table_info = self.data.physical_table_info.as_ref().unwrap();
-        let physical_table_route = self.data.physical_table_route.as_ref().unwrap();
+        // Safety: fetched in `fetch_physical_table_route_if_non_exist`.
+        let physical_table_route = self.physical_table_route.as_ref().unwrap();
        let region_distribution = region_distribution(&physical_table_route.region_routes);

        // Updates physical table's metadata.
--- a/src/common/meta/src/ddl/create_flow.rs
+++ b/src/common/meta/src/ddl/create_flow.rs
@@ -445,6 +445,10 @@ impl From<&CreateFlowData> for CreateRequest {
            create_if_not_exists: true,
            or_replace: value.task.or_replace,
            expire_after: value.task.expire_after.map(|value| ExpireAfter { value }),
+            eval_interval: value
+                .task
+                .eval_interval_secs
+                .map(|seconds| api::v1::EvalInterval { seconds }),
            comment: value.task.comment.clone(),
            sql: value.task.sql.clone(),
            flow_options: value.task.flow_options.clone(),
@@ -464,6 +468,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            flow_name,
            sink_table_name,
            expire_after,
+            eval_interval_secs: eval_interval,
            comment,
            sql,
            flow_options: mut options,
@@ -503,6 +508,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
            flow_name,
            raw_sql: sql,
            expire_after,
+            eval_interval_secs: eval_interval,
            comment,
            options,
            created_time: create_time,
--- a/src/common/meta/src/ddl/tests/create_flow.rs
+++ b/src/common/meta/src/ddl/tests/create_flow.rs
@@ -45,6 +45,7 @@ pub(crate) fn test_create_flow_task(
        or_replace: false,
        create_if_not_exists,
        expire_after: Some(300),
+        eval_interval_secs: None,
        comment: "".to_string(),
        sql: "select 1".to_string(),
        flow_options: Default::default(),
@@ -189,6 +190,7 @@ fn create_test_flow_task_for_serialization() -> CreateFlowTask {
        or_replace: false,
        create_if_not_exists: false,
        expire_after: None,
+        eval_interval_secs: None,
        comment: "test comment".to_string(),
        sql: "SELECT * FROM source_table".to_string(),
        flow_options: HashMap::new(),
--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -535,9 +535,6 @@ pub enum Error {
        source: common_wal::error::Error,
    },

-    #[snafu(display("Failed to resolve Kafka broker endpoint."))]
-    ResolveKafkaEndpoint { source: common_wal::error::Error },
-
    #[snafu(display("Failed to build a Kafka controller client"))]
    BuildKafkaCtrlClient {
        #[snafu(implicit)]
@@ -1108,7 +1105,6 @@ impl ErrorExt for Error {
            | BuildKafkaClient { .. }
            | BuildKafkaCtrlClient { .. }
            | KafkaPartitionClient { .. }
-            | ResolveKafkaEndpoint { .. }
            | ProduceRecord { .. }
            | CreateKafkaWalTopic { .. }
            | EmptyTopicPool { .. }
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -151,7 +151,7 @@ impl Display for DowngradeRegion {
 }

 /// Upgrades a follower region to leader region.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
 pub struct UpgradeRegion {
    /// The [RegionId].
    pub region_id: RegionId,
@@ -168,6 +168,24 @@ pub struct UpgradeRegion {
    /// The hint for replaying memtable.
    #[serde(default)]
    pub location_id: Option<u64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub replay_entry_id: Option<u64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub metadata_replay_entry_id: Option<u64>,
+}
+
+impl UpgradeRegion {
+    /// Sets the replay entry id.
+    pub fn with_replay_entry_id(mut self, replay_entry_id: Option<u64>) -> Self {
+        self.replay_entry_id = replay_entry_id;
+        self
+    }
+
+    /// Sets the metadata replay entry id.
+    pub fn with_metadata_replay_entry_id(mut self, metadata_replay_entry_id: Option<u64>) -> Self {
+        self.metadata_replay_entry_id = metadata_replay_entry_id;
+        self
+    }
 }

 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
--- a/src/common/meta/src/key.rs
+++ b/src/common/meta/src/key.rs
@@ -155,6 +155,7 @@ use crate::error::{self, Result, SerdeJsonSnafu};
 use crate::key::flow::flow_state::FlowStateValue;
 use crate::key::node_address::NodeAddressValue;
 use crate::key::table_route::TableRouteKey;
+use crate::key::topic_region::TopicRegionValue;
 use crate::key::txn_helper::TxnOpGetResponseSet;
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
@@ -164,6 +165,7 @@ use crate::state_store::PoisonValue;
 use crate::DatanodeId;

 pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*";
+pub const TOPIC_NAME_PATTERN: &str = r"[a-zA-Z0-9_:-][a-zA-Z0-9_:\-\.@#]*";
 pub const LEGACY_MAINTENANCE_KEY: &str = "__maintenance";
 pub const MAINTENANCE_KEY: &str = "__switches/maintenance";
 pub const PAUSE_PROCEDURE_KEY: &str = "__switches/pause_procedure";
@@ -271,6 +273,10 @@ lazy_static! {
    pub static ref NAME_PATTERN_REGEX: Regex = Regex::new(NAME_PATTERN).unwrap();
 }

+lazy_static! {
+    pub static ref TOPIC_NAME_PATTERN_REGEX: Regex = Regex::new(TOPIC_NAME_PATTERN).unwrap();
+}
+
 lazy_static! {
    static ref TABLE_INFO_KEY_PATTERN: Regex =
        Regex::new(&format!("^{TABLE_INFO_KEY_PREFIX}/([0-9]+)$")).unwrap();
@@ -326,7 +332,7 @@ lazy_static! {

 lazy_static! {
    pub static ref TOPIC_REGION_PATTERN: Regex = Regex::new(&format!(
-        "^{TOPIC_REGION_PREFIX}/({NAME_PATTERN})/([0-9]+)$"
+        "^{TOPIC_REGION_PREFIX}/({TOPIC_NAME_PATTERN})/([0-9]+)$"
    ))
    .unwrap();
 }
@@ -622,7 +628,6 @@ impl TableMetadataManager {
        &self.topic_region_manager
    }

-    #[cfg(feature = "testing")]
    pub fn kv_backend(&self) -> &KvBackendRef {
        &self.kv_backend
    }
@@ -1434,7 +1439,8 @@ impl_metadata_value! {
    NodeAddressValue,
    SchemaNameValue,
    FlowStateValue,
-    PoisonValue
+    PoisonValue,
+    TopicRegionValue
 }

 impl_optional_metadata_value! {
@@ -1676,9 +1682,11 @@ mod tests {
                .topic_region_manager
                .regions(&topic)
                .await
-                .unwrap();
+                .unwrap()
+                .into_keys()
+                .collect::<Vec<_>>();
            assert_eq!(regions.len(), 8);
-            assert_eq!(regions[0], region_id);
+            assert!(regions.contains(&region_id));
        }
    }

--- a/src/common/meta/src/key/flow.rs
+++ b/src/common/meta/src/key/flow.rs
@@ -464,6 +464,7 @@ mod tests {
            flownode_ids,
            raw_sql: "raw".to_string(),
            expire_after: Some(300),
+            eval_interval_secs: None,
            comment: "hi".to_string(),
            options: Default::default(),
            created_time: chrono::Utc::now(),
@@ -638,6 +639,7 @@ mod tests {
            flownode_ids: [(0, 1u64)].into(),
            raw_sql: "raw".to_string(),
            expire_after: Some(300),
+            eval_interval_secs: None,
            comment: "hi".to_string(),
            options: Default::default(),
            created_time: chrono::Utc::now(),
@@ -1013,6 +1015,7 @@ mod tests {
            flownode_ids: [(0, 1u64)].into(),
            raw_sql: "raw".to_string(),
            expire_after: Some(300),
+            eval_interval_secs: None,
            comment: "hi".to_string(),
            options: Default::default(),
            created_time: chrono::Utc::now(),
--- a/src/common/meta/src/key/flow/flow_info.rs
+++ b/src/common/meta/src/key/flow/flow_info.rs
@@ -135,6 +135,12 @@ pub struct FlowInfoValue {
    /// The expr of expire.
    /// Duration in seconds as `i64`.
    pub expire_after: Option<i64>,
+    /// The eval interval.
+    /// Duration in seconds as `i64`.
+    /// If `None`, will automatically decide when to evaluate the flow.
+    /// If `Some`, it will be evaluated every `eval_interval` seconds.
+    #[serde(default)]
+    pub eval_interval_secs: Option<i64>,
    /// The comment.
    pub comment: String,
    /// The options.
@@ -191,6 +197,10 @@ impl FlowInfoValue {
        self.expire_after
    }

+    pub fn eval_interval(&self) -> Option<i64> {
+        self.eval_interval_secs
+    }
+
    pub fn comment(&self) -> &String {
        &self.comment
    }
--- a/src/common/meta/src/key/topic_region.rs
+++ b/src/common/meta/src/key/topic_region.rs
@@ -12,20 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
 use std::collections::HashMap;
 use std::fmt::{self, Display};

@@ -37,10 +23,12 @@ use table::metadata::TableId;

 use crate::ddl::utils::parse_region_wal_options;
 use crate::error::{Error, InvalidMetadataSnafu, Result};
-use crate::key::{MetadataKey, TOPIC_REGION_PATTERN, TOPIC_REGION_PREFIX};
+use crate::key::{MetadataKey, MetadataValue, TOPIC_REGION_PATTERN, TOPIC_REGION_PREFIX};
 use crate::kv_backend::txn::{Txn, TxnOp};
 use crate::kv_backend::KvBackendRef;
-use crate::rpc::store::{BatchDeleteRequest, BatchPutRequest, PutRequest, RangeRequest};
+use crate::rpc::store::{
+    BatchDeleteRequest, BatchGetRequest, BatchPutRequest, PutRequest, RangeRequest,
+};
 use crate::rpc::KeyValue;

 // The TopicRegionKey is a key for the topic-region mapping in the kvbackend.
@@ -51,8 +39,20 @@ pub struct TopicRegionKey<'a> {
    pub topic: &'a str,
 }

-#[derive(Debug, Serialize, Deserialize)]
-pub struct TopicRegionValue;
+/// Represents additional information for a region when using a shared WAL.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
+pub struct TopicRegionValue {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub checkpoint: Option<ReplayCheckpoint>,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq, PartialOrd, Ord)]
+pub struct ReplayCheckpoint {
+    #[serde(default)]
+    pub entry_id: u64,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub metadata_entry_id: Option<u64>,
+}

 impl<'a> TopicRegionKey<'a> {
    pub fn new(region_id: RegionId, topic: &'a str) -> Self {
@@ -118,9 +118,47 @@ impl<'a> TryFrom<&'a str> for TopicRegionKey<'a> {
    }
 }

-fn topic_region_decoder(value: &KeyValue) -> Result<TopicRegionKey<'_>> {
+impl ReplayCheckpoint {
+    /// Creates a new [`ReplayCheckpoint`] with the given entry id and metadata entry id.
+    pub fn new(entry_id: u64, metadata_entry_id: Option<u64>) -> Self {
+        Self {
+            entry_id,
+            metadata_entry_id,
+        }
+    }
+}
+
+impl TopicRegionValue {
+    /// Creates a new [`TopicRegionValue`] with the given checkpoint.
+    pub fn new(checkpoint: Option<ReplayCheckpoint>) -> Self {
+        Self { checkpoint }
+    }
+
+    /// Returns the minimum entry id of the region.
+    ///
+    /// If the metadata entry id is not set, it returns the entry id.
+    pub fn min_entry_id(&self) -> Option<u64> {
+        match self.checkpoint {
+            Some(ReplayCheckpoint {
+                entry_id,
+                metadata_entry_id,
+            }) => match metadata_entry_id {
+                Some(metadata_entry_id) => Some(entry_id.min(metadata_entry_id)),
+                None => Some(entry_id),
+            },
+            None => None,
+        }
+    }
+}
+
+fn topic_region_decoder(value: &KeyValue) -> Result<(TopicRegionKey<'_>, TopicRegionValue)> {
    let key = TopicRegionKey::from_bytes(&value.key)?;
-    Ok(key)
+    let value = if value.value.is_empty() {
+        TopicRegionValue::default()
+    } else {
+        TopicRegionValue::try_from_raw_value(&value.value)?
+    };
+    Ok((key, value))
 }

 /// Manages map of topics and regions in kvbackend.
@@ -143,21 +181,59 @@ impl TopicRegionManager {
        Ok(())
    }

-    pub async fn batch_put(&self, keys: Vec<TopicRegionKey<'_>>) -> Result<()> {
+    pub async fn batch_get(
+        &self,
+        keys: Vec<TopicRegionKey<'_>>,
+    ) -> Result<HashMap<RegionId, TopicRegionValue>> {
+        let raw_keys = keys.iter().map(|key| key.to_bytes()).collect::<Vec<_>>();
+        let req = BatchGetRequest { keys: raw_keys };
+        let resp = self.kv_backend.batch_get(req).await?;
+
+        let v = resp
+            .kvs
+            .into_iter()
+            .map(|kv| topic_region_decoder(&kv).map(|(key, value)| (key.region_id, value)))
+            .collect::<Result<HashMap<_, _>>>()?;
+
+        Ok(v)
+    }
+
+    pub async fn get(&self, key: TopicRegionKey<'_>) -> Result<Option<TopicRegionValue>> {
+        let key_bytes = key.to_bytes();
+        let resp = self.kv_backend.get(&key_bytes).await?;
+        let value = resp
+            .map(|kv| topic_region_decoder(&kv).map(|(_, value)| value))
+            .transpose()?;
+
+        Ok(value)
+    }
+
+    pub async fn batch_put(
+        &self,
+        keys: &[(TopicRegionKey<'_>, Option<TopicRegionValue>)],
+    ) -> Result<()> {
        let req = BatchPutRequest {
            kvs: keys
-                .into_iter()
-                .map(|key| KeyValue {
-                    key: key.to_bytes(),
-                    value: vec![],
+                .iter()
+                .map(|(key, value)| {
+                    let value = value
+                        .map(|v| v.try_as_raw_value())
+                        .transpose()?
+                        .unwrap_or_default();
+
+                    Ok(KeyValue {
+                        key: key.to_bytes(),
+                        value,
+                    })
                })
-                .collect(),
+                .collect::<Result<Vec<_>>>()?,
            prev_kv: false,
        };
        self.kv_backend.batch_put(req).await?;
        Ok(())
    }

+    /// Build a create topic region mapping transaction. It only executes while the primary keys comparing successes.
    pub fn build_create_txn(
        &self,
        table_id: TableId,
@@ -176,8 +252,8 @@ impl TopicRegionManager {
        Ok(Txn::new().and_then(operations))
    }

-    /// Returns the list of region ids using specified topic.
-    pub async fn regions(&self, topic: &str) -> Result<Vec<RegionId>> {
+    /// Returns the map of [`RegionId`] to their corresponding topic [`TopicRegionValue`].
+    pub async fn regions(&self, topic: &str) -> Result<HashMap<RegionId, TopicRegionValue>> {
        let prefix = TopicRegionKey::range_topic_key(topic);
        let req = RangeRequest::new().with_prefix(prefix.as_bytes());
        let resp = self.kv_backend.range(req).await?;
@@ -186,7 +262,10 @@ impl TopicRegionManager {
            .iter()
            .map(topic_region_decoder)
            .collect::<Result<Vec<_>>>()?;
-        Ok(region_ids.iter().map(|key| key.region_id).collect())
+        Ok(region_ids
+            .into_iter()
+            .map(|(key, value)| (key.region_id, value))
+            .collect())
    }

    pub async fn delete(&self, key: TopicRegionKey<'_>) -> Result<()> {
@@ -248,15 +327,24 @@ mod tests {

        let topics = (0..16).map(|i| format!("topic_{}", i)).collect::<Vec<_>>();
        let keys = (0..64)
-            .map(|i| TopicRegionKey::new(RegionId::from_u64(i), &topics[(i % 16) as usize]))
+            .map(|i| {
+                (
+                    TopicRegionKey::new(RegionId::from_u64(i), &topics[(i % 16) as usize]),
+                    None,
+                )
+            })
            .collect::<Vec<_>>();

-        manager.batch_put(keys.clone()).await.unwrap();
-
-        let mut key_values = manager.regions(&topics[0]).await.unwrap();
+        manager.batch_put(&keys).await.unwrap();
+        let mut key_values = manager
+            .regions(&topics[0])
+            .await
+            .unwrap()
+            .into_keys()
+            .collect::<Vec<_>>();
        let expected = keys
            .iter()
-            .filter_map(|key| {
+            .filter_map(|(key, _)| {
                if key.topic == topics[0] {
                    Some(key.region_id)
                } else {
@@ -269,10 +357,15 @@ mod tests {

        let key = TopicRegionKey::new(RegionId::from_u64(0), "topic_0");
        manager.delete(key.clone()).await.unwrap();
-        let mut key_values = manager.regions(&topics[0]).await.unwrap();
+        let mut key_values = manager
+            .regions(&topics[0])
+            .await
+            .unwrap()
+            .into_keys()
+            .collect::<Vec<_>>();
        let expected = keys
            .iter()
-            .filter_map(|key| {
+            .filter_map(|(key, _)| {
                if key.topic == topics[0] && key.region_id != RegionId::from_u64(0) {
                    Some(key.region_id)
                } else {
@@ -324,4 +417,18 @@ mod tests {
        expected.sort_by_key(|(region_id, _)| region_id.as_u64());
        assert_eq!(topic_region_map, expected);
    }
+
+    #[test]
+    fn test_topic_region_key_is_match() {
+        let key = "__topic_region/6f153a64-7fac-4cf6-8b0b-a7967dd73879_2/4410931412992";
+        let topic_region_key = TopicRegionKey::try_from(key).unwrap();
+        assert_eq!(
+            topic_region_key.topic,
+            "6f153a64-7fac-4cf6-8b0b-a7967dd73879_2"
+        );
+        assert_eq!(
+            topic_region_key.region_id,
+            RegionId::from_u64(4410931412992)
+        );
+    }
 }
--- a/src/common/meta/src/kv_backend/etcd.rs
+++ b/src/common/meta/src/kv_backend/etcd.rs
@@ -32,6 +32,8 @@ use crate::rpc::store::{
 };
 use crate::rpc::KeyValue;

+const DEFAULT_MAX_DECODING_SIZE: usize = 32 * 1024 * 1024; // 32MB
+
 pub struct EtcdStore {
    client: Client,
    // Maximum number of operations permitted in a transaction.
@@ -39,6 +41,8 @@ pub struct EtcdStore {
    //
    // For more detail, see: https://etcd.io/docs/v3.5/op-guide/configuration/
    max_txn_ops: usize,
+    // Maximum decoding message size in bytes. Default 32MB.
+    max_decoding_size: usize,
 }

 impl EtcdStore {
@@ -59,9 +63,20 @@ impl EtcdStore {
        Arc::new(Self {
            client,
            max_txn_ops,
+            max_decoding_size: DEFAULT_MAX_DECODING_SIZE,
        })
    }

+    pub fn set_max_decoding_size(&mut self, max_decoding_size: usize) {
+        self.max_decoding_size = max_decoding_size;
+    }
+
+    fn kv_client(&self) -> etcd_client::KvClient {
+        self.client
+            .kv_client()
+            .max_decoding_message_size(self.max_decoding_size)
+    }
+
    async fn do_multi_txn(&self, txn_ops: Vec<TxnOp>) -> Result<Vec<TxnResponse>> {
        let max_txn_ops = self.max_txn_ops();
        if txn_ops.len() < max_txn_ops {
@@ -71,7 +86,6 @@ impl EtcdStore {
                .start_timer();
            let txn = Txn::new().and_then(txn_ops);
            let txn_res = self
-                .client
                .kv_client()
                .txn(txn)
                .await
@@ -86,7 +100,7 @@ impl EtcdStore {
                    .with_label_values(&["etcd", "txn"])
                    .start_timer();
                let txn = Txn::new().and_then(part);
-                self.client.kv_client().txn(txn).await
+                self.kv_client().txn(txn).await
            })
            .collect::<Vec<_>>();

@@ -110,7 +124,6 @@ impl KvBackend for EtcdStore {
        let Get { key, options } = req.try_into()?;

        let mut res = self
-            .client
            .kv_client()
            .get(key, options)
            .await
@@ -136,7 +149,6 @@ impl KvBackend for EtcdStore {
        } = req.try_into()?;

        let mut res = self
-            .client
            .kv_client()
            .put(key, value, options)
            .await
@@ -201,7 +213,6 @@ impl KvBackend for EtcdStore {
        let Delete { key, options } = req.try_into()?;

        let mut res = self
-            .client
            .kv_client()
            .delete(key, options)
            .await
@@ -265,7 +276,6 @@ impl TxnService for EtcdStore {

        let etcd_txn: Txn = txn.into();
        let txn_res = self
-            .client
            .kv_client()
            .txn(etcd_txn)
            .await
@@ -564,6 +574,7 @@ mod tests {
        Some(EtcdStore {
            client,
            max_txn_ops: 128,
+            max_decoding_size: DEFAULT_MAX_DECODING_SIZE,
        })
    }

--- a/src/common/meta/src/kv_backend/rds/postgres.rs
+++ b/src/common/meta/src/kv_backend/rds/postgres.rs
@@ -192,50 +192,61 @@ fn pg_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {

 /// Factory for building sql templates.
 struct PgSqlTemplateFactory<'a> {
+    schema_name: Option<&'a str>,
    table_name: &'a str,
 }

 impl<'a> PgSqlTemplateFactory<'a> {
-    /// Creates a new [`SqlTemplateFactory`] with the given table name.
-    fn new(table_name: &'a str) -> Self {
-        Self { table_name }
+    /// Creates a new factory with optional schema.
+    fn new(schema_name: Option<&'a str>, table_name: &'a str) -> Self {
+        Self {
+            schema_name,
+            table_name,
+        }
    }

    /// Builds the template set for the given table name.
    fn build(&self) -> PgSqlTemplateSet {
-        let table_name = self.table_name;
+        let table_ident = Self::format_table_ident(self.schema_name, self.table_name);
        // Some of queries don't end with `;`, because we need to add `LIMIT` clause.
        PgSqlTemplateSet {
-            table_name: table_name.to_string(),
+            table_ident: table_ident.clone(),
+            // Do not attempt to create schema implicitly to avoid extra privileges requirement.
            create_table_statement: format!(
-                "CREATE TABLE IF NOT EXISTS \"{table_name}\"(k bytea PRIMARY KEY, v bytea)",
+                "CREATE TABLE IF NOT EXISTS {table_ident}(k bytea PRIMARY KEY, v bytea)",
            ),
            range_template: RangeTemplate {
-                point: format!("SELECT k, v FROM \"{table_name}\" WHERE k = $1"),
+                point: format!("SELECT k, v FROM {table_ident} WHERE k = $1"),
                range: format!(
-                    "SELECT k, v FROM \"{table_name}\" WHERE k >= $1 AND k < $2 ORDER BY k"
+                    "SELECT k, v FROM {table_ident} WHERE k >= $1 AND k < $2 ORDER BY k"
                ),
-                full: format!("SELECT k, v FROM \"{table_name}\" ORDER BY k"),
-                left_bounded: format!("SELECT k, v FROM \"{table_name}\" WHERE k >= $1 ORDER BY k"),
-                prefix: format!("SELECT k, v FROM \"{table_name}\" WHERE k LIKE $1 ORDER BY k"),
+                full: format!("SELECT k, v FROM {table_ident} ORDER BY k"),
+                left_bounded: format!("SELECT k, v FROM {table_ident} WHERE k >= $1 ORDER BY k"),
+                prefix: format!("SELECT k, v FROM {table_ident} WHERE k LIKE $1 ORDER BY k"),
            },
            delete_template: RangeTemplate {
-                point: format!("DELETE FROM \"{table_name}\" WHERE k = $1 RETURNING k,v;"),
-                range: format!(
-                    "DELETE FROM \"{table_name}\" WHERE k >= $1 AND k < $2 RETURNING k,v;"
-                ),
-                full: format!("DELETE FROM \"{table_name}\" RETURNING k,v"),
-                left_bounded: format!("DELETE FROM \"{table_name}\" WHERE k >= $1 RETURNING k,v;"),
-                prefix: format!("DELETE FROM \"{table_name}\" WHERE k LIKE $1 RETURNING k,v;"),
+                point: format!("DELETE FROM {table_ident} WHERE k = $1 RETURNING k,v;"),
+                range: format!("DELETE FROM {table_ident} WHERE k >= $1 AND k < $2 RETURNING k,v;"),
+                full: format!("DELETE FROM {table_ident} RETURNING k,v"),
+                left_bounded: format!("DELETE FROM {table_ident} WHERE k >= $1 RETURNING k,v;"),
+                prefix: format!("DELETE FROM {table_ident} WHERE k LIKE $1 RETURNING k,v;"),
            },
        }
    }
+
+    /// Formats the table reference with schema if provided.
+    fn format_table_ident(schema_name: Option<&str>, table_name: &str) -> String {
+        match schema_name {
+            Some(s) if !s.is_empty() => format!("\"{}\".\"{}\"", s, table_name),
+            _ => format!("\"{}\"", table_name),
+        }
+    }
 }

 /// Templates for the given table name.
 #[derive(Debug, Clone)]
 pub struct PgSqlTemplateSet {
-    table_name: String,
+    table_ident: String,
    create_table_statement: String,
    range_template: RangeTemplate,
    delete_template: RangeTemplate,
@@ -244,27 +255,24 @@ pub struct PgSqlTemplateSet {
 impl PgSqlTemplateSet {
    /// Generates the sql for batch get.
    fn generate_batch_get_query(&self, key_len: usize) -> String {
-        let table_name = &self.table_name;
        let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
        format!(
-            "SELECT k, v FROM \"{table_name}\" WHERE k in ({});",
-            in_clause
+            "SELECT k, v FROM {} WHERE k in ({});",
+            self.table_ident, in_clause
        )
    }

    /// Generates the sql for batch delete.
    fn generate_batch_delete_query(&self, key_len: usize) -> String {
-        let table_name = &self.table_name;
        let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
        format!(
-            "DELETE FROM \"{table_name}\" WHERE k in ({}) RETURNING k,v;",
-            in_clause
+            "DELETE FROM {} WHERE k in ({}) RETURNING k,v;",
+            self.table_ident, in_clause
        )
    }

    /// Generates the sql for batch upsert.
    fn generate_batch_upsert_query(&self, kv_len: usize) -> String {
-        let table_name = &self.table_name;
        let in_placeholders: Vec<String> = (1..=kv_len).map(|i| format!("${}", i)).collect();
        let in_clause = in_placeholders.join(", ");
        let mut param_index = kv_len + 1;
@@ -278,9 +286,9 @@ impl PgSqlTemplateSet {
        format!(
            r#"
    WITH prev AS (
-        SELECT k,v FROM "{table_name}" WHERE k IN ({in_clause})
+        SELECT k,v FROM {table} WHERE k IN ({in_clause})
    ), update AS (
-    INSERT INTO "{table_name}" (k, v) VALUES
+    INSERT INTO {table} (k, v) VALUES
        {values_clause}
    ON CONFLICT (
        k
@@ -289,7 +297,10 @@ impl PgSqlTemplateSet {
    )

    SELECT k, v FROM prev;
-    "#
+    "#,
+            table = self.table_ident,
+            in_clause = in_clause,
+            values_clause = values_clause
        )
    }
 }
@@ -835,7 +846,7 @@ impl PgStore {
                .context(CreatePostgresPoolSnafu)?,
        };

-        Self::with_pg_pool(pool, table_name, max_txn_ops).await
+        Self::with_pg_pool(pool, None, table_name, max_txn_ops).await
    }

    /// Create [PgStore] impl of [KvBackendRef] from url (backward compatibility).
@@ -843,15 +854,14 @@ impl PgStore {
        Self::with_url_and_tls(url, table_name, max_txn_ops, None).await
    }

-    /// Create [PgStore] impl of [KvBackendRef] from [deadpool_postgres::Pool].
+    /// Create [PgStore] impl of [KvBackendRef] from [deadpool_postgres::Pool] with optional schema.
    pub async fn with_pg_pool(
        pool: Pool,
+        schema_name: Option<&str>,
        table_name: &str,
        max_txn_ops: usize,
    ) -> Result<KvBackendRef> {
-        // This step ensures the postgres metadata backend is ready to use.
-        // We check if greptime_metakv table exists, and we will create a new table
-        // if it does not exist.
+        // Ensure the postgres metadata backend is ready to use.
        let client = match pool.get().await {
            Ok(client) => client,
            Err(e) => {
@@ -861,8 +871,9 @@ impl PgStore {
                .fail();
            }
        };
-        let template_factory = PgSqlTemplateFactory::new(table_name);
+        let template_factory = PgSqlTemplateFactory::new(schema_name, table_name);
        let sql_template_set = template_factory.build();
+        // Do not attempt to create schema implicitly.
        client
            .execute(&sql_template_set.create_table_statement, &[])
            .await
@@ -890,7 +901,7 @@ mod tests {
        test_txn_compare_less, test_txn_compare_not_equal, test_txn_one_compare_op,
        text_txn_multi_compare_op, unprepare_kv,
    };
-    use crate::maybe_skip_postgres_integration_test;
+    use crate::{maybe_skip_postgres15_integration_test, maybe_skip_postgres_integration_test};

    async fn build_pg_kv_backend(table_name: &str) -> Option<PgStore> {
        let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap_or_default();
@@ -905,8 +916,10 @@ mod tests {
            .context(CreatePostgresPoolSnafu)
            .unwrap();
        let client = pool.get().await.unwrap();
-        let template_factory = PgSqlTemplateFactory::new(table_name);
+        // use the default schema (i.e., public)
+        let template_factory = PgSqlTemplateFactory::new(None, table_name);
        let sql_templates = template_factory.build();
+        // Do not attempt to create schema implicitly.
        client
            .execute(&sql_templates.create_table_statement, &[])
            .await
@@ -923,6 +936,61 @@ mod tests {
        })
    }

+    async fn build_pg15_pool() -> Option<Pool> {
+        let url = std::env::var("GT_POSTGRES15_ENDPOINTS").unwrap_or_default();
+        if url.is_empty() {
+            return None;
+        }
+        let mut cfg = Config::new();
+        cfg.url = Some(url);
+        let pool = cfg
+            .create_pool(Some(Runtime::Tokio1), NoTls)
+            .context(CreatePostgresPoolSnafu)
+            .ok()?;
+        Some(pool)
+    }
+
+    #[tokio::test]
+    async fn test_pg15_create_table_in_public_should_fail() {
+        maybe_skip_postgres15_integration_test!();
+        let Some(pool) = build_pg15_pool().await else {
+            return;
+        };
+        let res = PgStore::with_pg_pool(pool, None, "pg15_public_should_fail", 128).await;
+        assert!(
+            res.is_err(),
+            "creating table in public should fail for test_user"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_pg15_create_table_in_test_schema_and_crud_should_succeed() {
+        maybe_skip_postgres15_integration_test!();
+        let Some(pool) = build_pg15_pool().await else {
+            return;
+        };
+        let schema_name = std::env::var("GT_POSTGRES15_SCHEMA").unwrap();
+        let client = pool.get().await.unwrap();
+        let factory = PgSqlTemplateFactory::new(Some(&schema_name), "pg15_ok");
+        let templates = factory.build();
+        client
+            .execute(&templates.create_table_statement, &[])
+            .await
+            .unwrap();
+        let kv = PgStore {
+            max_txn_ops: 128,
+            sql_template_set: templates,
+            txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
+            executor_factory: PgExecutorFactory { pool },
+            _phantom: PhantomData,
+        };
+        let prefix = b"pg15_crud/";
+        prepare_kv_with_prefix(&kv, prefix.to_vec()).await;
+        test_kv_put_with_prefix(&kv, prefix.to_vec()).await;
+        test_kv_batch_get_with_prefix(&kv, prefix.to_vec()).await;
+        unprepare_kv(&kv, prefix).await;
+    }
+
    #[tokio::test]
    async fn test_pg_put() {
        maybe_skip_postgres_integration_test!();
@@ -1024,4 +1092,31 @@ mod tests {
        test_txn_compare_less(&kv_backend).await;
        test_txn_compare_not_equal(&kv_backend).await;
    }
+
+    #[test]
+    fn test_pg_template_with_schema() {
+        let factory = PgSqlTemplateFactory::new(Some("test_schema"), "greptime_metakv");
+        let t = factory.build();
+        assert!(t
+            .create_table_statement
+            .contains("\"test_schema\".\"greptime_metakv\""));
+        let upsert = t.generate_batch_upsert_query(1);
+        assert!(upsert.contains("\"test_schema\".\"greptime_metakv\""));
+        let get = t.generate_batch_get_query(1);
+        assert!(get.contains("\"test_schema\".\"greptime_metakv\""));
+        let del = t.generate_batch_delete_query(1);
+        assert!(del.contains("\"test_schema\".\"greptime_metakv\""));
+    }
+
+    #[test]
+    fn test_format_table_ident() {
+        let t = PgSqlTemplateFactory::format_table_ident(None, "test_table");
+        assert_eq!(t, "\"test_table\"");
+
+        let t = PgSqlTemplateFactory::format_table_ident(Some("test_schema"), "test_table");
+        assert_eq!(t, "\"test_schema\".\"test_table\"");
+
+        let t = PgSqlTemplateFactory::format_table_ident(Some(""), "test_table");
+        assert_eq!(t, "\"test_table\"");
+    }
 }
--- a/src/common/meta/src/reconciliation/utils.rs
+++ b/src/common/meta/src/reconciliation/utils.rs
@@ -24,6 +24,7 @@ use datatypes::schema::ColumnSchema;
 use futures::future::{join_all, try_join_all};
 use snafu::{ensure, OptionExt, ResultExt};
 use store_api::metadata::{ColumnMetadata, RegionMetadata};
+use store_api::storage::consts::ReservedColumnId;
 use store_api::storage::{RegionId, TableId};
 use table::metadata::{RawTableInfo, RawTableMeta};
 use table::table_name::TableName;
@@ -384,6 +385,7 @@ pub(crate) fn build_table_meta_from_column_metadatas(

    *next_column_id = column_ids
        .iter()
+        .filter(|id| !ReservedColumnId::is_reserved(**id))
        .max()
        .map(|max| max + 1)
        .unwrap_or(*next_column_id)
@@ -1039,9 +1041,13 @@ mod tests {
    fn test_build_table_info_from_column_metadatas() {
        let mut column_metadatas = new_test_column_metadatas();
        column_metadatas.push(ColumnMetadata {
-            column_schema: ColumnSchema::new("col3", ConcreteDataType::string_datatype(), true),
+            column_schema: ColumnSchema::new(
+                "__table_id",
+                ConcreteDataType::string_datatype(),
+                true,
+            ),
            semantic_type: SemanticType::Tag,
-            column_id: 3,
+            column_id: ReservedColumnId::table_id(),
        });

        let table_id = 1;
@@ -1066,8 +1072,11 @@ mod tests {
        assert_eq!(new_table_meta.partition_key_indices, vec![2]);
        assert_eq!(new_table_meta.value_indices, vec![1, 2]);
        assert_eq!(new_table_meta.schema.timestamp_index, Some(1));
-        assert_eq!(new_table_meta.column_ids, vec![0, 1, 2, 3]);
-        assert_eq!(new_table_meta.next_column_id, 4);
+        assert_eq!(
+            new_table_meta.column_ids,
+            vec![0, 1, 2, ReservedColumnId::table_id()]
+        );
+        assert_eq!(new_table_meta.next_column_id, table_meta.next_column_id);
    }

    #[test]
--- a/src/common/meta/src/region_registry.rs
+++ b/src/common/meta/src/region_registry.rs
@@ -133,6 +133,34 @@ impl LeaderRegionManifestInfo {
        }
    }

+    /// Returns the replay entry id of the data region.
+    pub fn replay_entry_id(&self) -> u64 {
+        match self {
+            LeaderRegionManifestInfo::Mito {
+                flushed_entry_id,
+                topic_latest_entry_id,
+                ..
+            } => (*flushed_entry_id).max(*topic_latest_entry_id),
+            LeaderRegionManifestInfo::Metric {
+                data_flushed_entry_id,
+                data_topic_latest_entry_id,
+                ..
+            } => (*data_flushed_entry_id).max(*data_topic_latest_entry_id),
+        }
+    }
+
+    /// Returns the replay entry id of the metadata region.
+    pub fn metadata_replay_entry_id(&self) -> Option<u64> {
+        match self {
+            LeaderRegionManifestInfo::Metric {
+                metadata_flushed_entry_id,
+                metadata_topic_latest_entry_id,
+                ..
+            } => Some((*metadata_flushed_entry_id).max(*metadata_topic_latest_entry_id)),
+            _ => None,
+        }
+    }
+
    /// A region is considered inactive if the flushed entry id is less than the topic's latest entry id.
    ///
    /// The `topic_latest_entry_id` of a region is updated only when its memtable is empty during a flush.
--- a/src/common/meta/src/rpc/ddl.rs
+++ b/src/common/meta/src/rpc/ddl.rs
@@ -34,8 +34,8 @@ use api::v1::meta::{
 };
 use api::v1::{
    AlterDatabaseExpr, AlterTableExpr, CreateDatabaseExpr, CreateFlowExpr, CreateTableExpr,
-    CreateViewExpr, DropDatabaseExpr, DropFlowExpr, DropTableExpr, DropViewExpr, ExpireAfter,
-    Option as PbOption, QueryContext as PbQueryContext, TruncateTableExpr,
+    CreateViewExpr, DropDatabaseExpr, DropFlowExpr, DropTableExpr, DropViewExpr, EvalInterval,
+    ExpireAfter, Option as PbOption, QueryContext as PbQueryContext, TruncateTableExpr,
 };
 use base64::engine::general_purpose;
 use base64::Engine as _;
@@ -1125,6 +1125,7 @@ pub struct CreateFlowTask {
    pub create_if_not_exists: bool,
    /// Duration in seconds. Data older than this duration will not be used.
    pub expire_after: Option<i64>,
+    pub eval_interval_secs: Option<i64>,
    pub comment: String,
    pub sql: String,
    pub flow_options: HashMap<String, String>,
@@ -1142,6 +1143,7 @@ impl TryFrom<PbCreateFlowTask> for CreateFlowTask {
            or_replace,
            create_if_not_exists,
            expire_after,
+            eval_interval,
            comment,
            sql,
            flow_options,
@@ -1161,6 +1163,7 @@ impl TryFrom<PbCreateFlowTask> for CreateFlowTask {
            or_replace,
            create_if_not_exists,
            expire_after: expire_after.map(|e| e.value),
+            eval_interval_secs: eval_interval.map(|e| e.seconds),
            comment,
            sql,
            flow_options,
@@ -1178,6 +1181,7 @@ impl From<CreateFlowTask> for PbCreateFlowTask {
            or_replace,
            create_if_not_exists,
            expire_after,
+            eval_interval_secs: eval_interval,
            comment,
            sql,
            flow_options,
@@ -1192,6 +1196,7 @@ impl From<CreateFlowTask> for PbCreateFlowTask {
                or_replace,
                create_if_not_exists,
                expire_after: expire_after.map(|value| ExpireAfter { value }),
+                eval_interval: eval_interval.map(|seconds| EvalInterval { seconds }),
                comment,
                sql,
                flow_options,
--- a/src/common/meta/src/test_util.rs
+++ b/src/common/meta/src/test_util.rs
@@ -260,7 +260,7 @@ pub async fn test_kafka_topic_pool(
 /// Skip the test if the environment variable `GT_POSTGRES_ENDPOINTS` is not set.
 ///
 /// The format of the environment variable is:
-/// ```
+/// ```text
 /// GT_POSTGRES_ENDPOINTS=localhost:9092,localhost:9093
 /// ```
 macro_rules! maybe_skip_postgres_integration_test {
@@ -276,7 +276,7 @@ macro_rules! maybe_skip_postgres_integration_test {
 /// Skip the test if the environment variable `GT_MYSQL_ENDPOINTS` is not set.
 ///
 /// The format of the environment variable is:
-/// ```
+/// ```text
 /// GT_MYSQL_ENDPOINTS=localhost:9092,localhost:9093
 /// ```
 macro_rules! maybe_skip_mysql_integration_test {
@@ -287,3 +287,19 @@ macro_rules! maybe_skip_mysql_integration_test {
        }
    };
 }
+
+#[macro_export]
+/// Skip the test if the environment variable `GT_POSTGRES15_ENDPOINTS` is not set.
+///
+/// The format of the environment variable is:
+/// ```text
+/// GT_POSTGRES15_ENDPOINTS=postgres://user:password@127.0.0.1:5433/postgres
+/// ```
+macro_rules! maybe_skip_postgres15_integration_test {
+    () => {
+        if std::env::var("GT_POSTGRES15_ENDPOINTS").is_err() {
+            common_telemetry::warn!("The PG15 endpoints is empty, skipping the test");
+            return;
+        }
+    };
+}
--- a/src/common/meta/src/wal_options_allocator.rs
+++ b/src/common/meta/src/wal_options_allocator.rs
@@ -27,7 +27,7 @@ use snafu::{ensure, ResultExt};
 use store_api::storage::{RegionId, RegionNumber};

 use crate::error::{EncodeWalOptionsSnafu, InvalidTopicNamePrefixSnafu, Result};
-use crate::key::NAME_PATTERN_REGEX;
+use crate::key::TOPIC_NAME_PATTERN_REGEX;
 use crate::kv_backend::KvBackendRef;
 use crate::leadership_notifier::LeadershipChangeListener;
 pub use crate::wal_options_allocator::topic_creator::{
@@ -109,7 +109,7 @@ pub async fn build_wal_options_allocator(
        MetasrvWalConfig::Kafka(kafka_config) => {
            let prefix = &kafka_config.kafka_topic.topic_name_prefix;
            ensure!(
-                NAME_PATTERN_REGEX.is_match(prefix),
+                TOPIC_NAME_PATTERN_REGEX.is_match(prefix),
                InvalidTopicNamePrefixSnafu { prefix }
            );
            let topic_creator =
@@ -149,6 +149,26 @@ pub fn prepare_wal_options(
    }
 }

+/// Extracts the topic from the wal options.
+pub fn extract_topic_from_wal_options(
+    region_id: RegionId,
+    region_options: &HashMap<RegionNumber, String>,
+) -> Option<String> {
+    region_options
+        .get(&region_id.region_number())
+        .and_then(|wal_options| {
+            serde_json::from_str::<WalOptions>(wal_options)
+                .ok()
+                .and_then(|wal_options| {
+                    if let WalOptions::Kafka(kafka_wal_option) = wal_options {
+                        Some(kafka_wal_option.topic)
+                    } else {
+                        None
+                    }
+                })
+        })
+}
+
 #[cfg(test)]
 mod tests {
    use std::assert_matches::assert_matches;
--- a/src/common/meta/src/wal_options_allocator/topic_creator.rs
+++ b/src/common/meta/src/wal_options_allocator/topic_creator.rs
@@ -25,8 +25,7 @@ use snafu::ResultExt;

 use crate::error::{
    BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, CreateKafkaWalTopicSnafu,
-    KafkaGetOffsetSnafu, KafkaPartitionClientSnafu, ProduceRecordSnafu, ResolveKafkaEndpointSnafu,
-    Result, TlsConfigSnafu,
+    KafkaGetOffsetSnafu, KafkaPartitionClientSnafu, ProduceRecordSnafu, Result, TlsConfigSnafu,
 };

 // Each topic only has one partition for now.
@@ -209,10 +208,8 @@ impl KafkaTopicCreator {
 /// Builds a kafka [Client](rskafka::client::Client).
 pub async fn build_kafka_client(connection: &KafkaConnectionConfig) -> Result<Client> {
    // Builds an kafka controller client for creating topics.
-    let broker_endpoints = common_wal::resolve_to_ipv4(&connection.broker_endpoints)
-        .await
-        .context(ResolveKafkaEndpointSnafu)?;
-    let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(DEFAULT_BACKOFF_CONFIG);
+    let mut builder = ClientBuilder::new(connection.broker_endpoints.clone())
+        .backoff_config(DEFAULT_BACKOFF_CONFIG);
    if let Some(sasl) = &connection.sasl {
        builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
    };
--- a/src/common/query/Cargo.toml
+++ b/src/common/query/Cargo.toml
@@ -26,7 +26,6 @@ serde.workspace = true
 snafu.workspace = true
 sqlparser.workspace = true
 sqlparser_derive = "0.1"
-statrs = "0.16"
 store-api.workspace = true

 [dev-dependencies]
--- a/src/common/runtime/Cargo.toml
+++ b/src/common/runtime/Cargo.toml
@@ -28,7 +28,6 @@ parking_lot.workspace = true
 paste.workspace = true
 pin-project.workspace = true
 prometheus.workspace = true
-rand.workspace = true
 ratelimit.workspace = true
 serde.workspace = true
 serde_json.workspace = true
--- a/src/common/sql/Cargo.toml
+++ b/src/common/sql/Cargo.toml
@@ -6,7 +6,6 @@ license.workspace = true

 [dependencies]
 common-base.workspace = true
-common-datasource.workspace = true
 common-decimal.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
--- a/src/common/wal/src/config.rs
+++ b/src/common/wal/src/config.rs
@@ -20,7 +20,8 @@ use std::time::Duration;
 use serde::{Deserialize, Serialize};

 use crate::config::kafka::common::{
-    DEFAULT_AUTO_PRUNE_INTERVAL, DEFAULT_AUTO_PRUNE_PARALLELISM, DEFAULT_FLUSH_TRIGGER_SIZE,
+    DEFAULT_AUTO_PRUNE_INTERVAL, DEFAULT_AUTO_PRUNE_PARALLELISM, DEFAULT_CHECKPOINT_TRIGGER_SIZE,
+    DEFAULT_FLUSH_TRIGGER_SIZE,
 };
 use crate::config::kafka::{DatanodeKafkaConfig, MetasrvKafkaConfig};
 use crate::config::raft_engine::RaftEngineConfig;
@@ -64,6 +65,8 @@ impl From<DatanodeWalConfig> for MetasrvWalConfig {
                auto_prune_parallelism: DEFAULT_AUTO_PRUNE_PARALLELISM,
                // This field won't be used in standalone mode
                flush_trigger_size: DEFAULT_FLUSH_TRIGGER_SIZE,
+                // This field won't be used in standalone mode
+                checkpoint_trigger_size: DEFAULT_CHECKPOINT_TRIGGER_SIZE,
            }),
        }
    }
@@ -205,9 +208,10 @@ mod tests {
                create_topic_timeout: Duration::from_secs(30),
            },
            auto_create_topics: true,
-            auto_prune_interval: Duration::from_secs(0),
+            auto_prune_interval: Duration::from_mins(30),
            auto_prune_parallelism: 10,
            flush_trigger_size: ReadableSize::mb(512),
+            checkpoint_trigger_size: ReadableSize::mb(128),
        };
        assert_eq!(metasrv_wal_config, MetasrvWalConfig::Kafka(expected));

--- a/src/common/wal/src/config/kafka/common.rs
+++ b/src/common/wal/src/config/kafka/common.rs
@@ -37,11 +37,13 @@ pub const DEFAULT_BACKOFF_CONFIG: BackoffConfig = BackoffConfig {
 };

 /// Default interval for auto WAL pruning.
-pub const DEFAULT_AUTO_PRUNE_INTERVAL: Duration = Duration::ZERO;
+pub const DEFAULT_AUTO_PRUNE_INTERVAL: Duration = Duration::from_mins(30);
 /// Default limit for concurrent auto pruning tasks.
 pub const DEFAULT_AUTO_PRUNE_PARALLELISM: usize = 10;
 /// Default size of WAL to trigger flush.
 pub const DEFAULT_FLUSH_TRIGGER_SIZE: ReadableSize = ReadableSize::mb(512);
+/// Default checkpoint trigger size.
+pub const DEFAULT_CHECKPOINT_TRIGGER_SIZE: ReadableSize = ReadableSize::mb(128);

 use crate::error::{self, Result};
 use crate::{TopicSelectorType, BROKER_ENDPOINT, TOPIC_NAME_PREFIX};
--- a/src/common/wal/src/config/kafka/metasrv.rs
+++ b/src/common/wal/src/config/kafka/metasrv.rs
@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};

 use crate::config::kafka::common::{
    KafkaConnectionConfig, KafkaTopicConfig, DEFAULT_AUTO_PRUNE_INTERVAL,
-    DEFAULT_AUTO_PRUNE_PARALLELISM, DEFAULT_FLUSH_TRIGGER_SIZE,
+    DEFAULT_AUTO_PRUNE_PARALLELISM, DEFAULT_CHECKPOINT_TRIGGER_SIZE, DEFAULT_FLUSH_TRIGGER_SIZE,
 };

 /// Kafka wal configurations for metasrv.
@@ -41,6 +41,8 @@ pub struct MetasrvKafkaConfig {
    pub auto_prune_parallelism: usize,
    // The size of WAL to trigger flush.
    pub flush_trigger_size: ReadableSize,
+    // The checkpoint trigger size.
+    pub checkpoint_trigger_size: ReadableSize,
 }

 impl Default for MetasrvKafkaConfig {
@@ -52,6 +54,7 @@ impl Default for MetasrvKafkaConfig {
            auto_prune_interval: DEFAULT_AUTO_PRUNE_INTERVAL,
            auto_prune_parallelism: DEFAULT_AUTO_PRUNE_PARALLELISM,
            flush_trigger_size: DEFAULT_FLUSH_TRIGGER_SIZE,
+            checkpoint_trigger_size: DEFAULT_CHECKPOINT_TRIGGER_SIZE,
        }
    }
 }
--- a/src/common/wal/src/lib.rs
+++ b/src/common/wal/src/lib.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 #![feature(assert_matches)]
+#![feature(duration_constructors_lite)]

 use std::net::SocketAddr;

--- a/src/common/workload/Cargo.toml
+++ b/src/common/workload/Cargo.toml
@@ -8,6 +8,5 @@ license.workspace = true
 workspace = true

 [dependencies]
-api.workspace = true
 common-telemetry.workspace = true
 serde.workspace = true
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -23,18 +23,15 @@ use common_error::ext::BoxedError;
 use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
 use common_meta::cache::{LayeredCacheRegistry, SchemaCacheRef, TableSchemaCacheRef};
 use common_meta::datanode::TopicStatsReporter;
-use common_meta::key::datanode_table::{DatanodeTableManager, DatanodeTableValue};
 use common_meta::key::runtime_switch::RuntimeSwitchManager;
 use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
 use common_meta::kv_backend::KvBackendRef;
-use common_meta::wal_options_allocator::prepare_wal_options;
 pub use common_procedure::options::ProcedureConfig;
 use common_telemetry::{error, info, warn};
 use common_wal::config::kafka::DatanodeKafkaConfig;
 use common_wal::config::raft_engine::RaftEngineConfig;
 use common_wal::config::DatanodeWalConfig;
 use file_engine::engine::FileRegionEngine;
-use futures_util::TryStreamExt;
 use log_store::kafka::log_store::KafkaLogStore;
 use log_store::kafka::{default_index_file, GlobalIndexCollector};
 use log_store::raft_engine::log_store::RaftEngineLogStore;
@@ -49,10 +46,8 @@ use query::QueryEngineFactory;
 use servers::export_metrics::ExportMetricsTask;
 use servers::server::ServerHandlers;
 use snafu::{ensure, OptionExt, ResultExt};
-use store_api::path_utils::{table_dir, WAL_DIR};
+use store_api::path_utils::WAL_DIR;
 use store_api::region_engine::{RegionEngineRef, RegionRole};
-use store_api::region_request::{PathType, RegionOpenRequest};
-use store_api::storage::RegionId;
 use tokio::fs;
 use tokio::sync::Notify;

@@ -70,6 +65,7 @@ use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
 use crate::heartbeat::HeartbeatTask;
 use crate::region_server::{DummyTableProviderFactory, RegionServer};
 use crate::store::{self, new_object_store_without_cache};
+use crate::utils::{build_region_open_requests, RegionOpenRequests};

 /// Datanode service.
 pub struct Datanode {
@@ -252,16 +248,12 @@ impl DatanodeBuilder {
            .recovery_mode()
            .await
            .context(GetMetadataSnafu)?;
-        let datanode_table_manager = DatanodeTableManager::new(self.kv_backend.clone());
-        let table_values = datanode_table_manager
-            .tables(node_id)
-            .try_collect::<Vec<_>>()
-            .await
-            .context(GetMetadataSnafu)?;

+        let region_open_requests =
+            build_region_open_requests(node_id, self.kv_backend.clone()).await?;
        let open_all_regions = open_all_regions(
            region_server.clone(),
-            table_values,
+            region_open_requests,
            !controlled_by_metasrv,
            self.opts.init_regions_parallelism,
            // Ignore nonexistent regions in recovery mode.
@@ -342,27 +334,22 @@ impl DatanodeBuilder {
    async fn initialize_region_server(
        &self,
        region_server: &RegionServer,
-        kv_backend: KvBackendRef,
        open_with_writable: bool,
    ) -> Result<()> {
        let node_id = self.opts.node_id.context(MissingNodeIdSnafu)?;

-        let runtime_switch_manager = RuntimeSwitchManager::new(kv_backend.clone());
+        // TODO(weny): Considering introducing a readonly kv_backend trait.
+        let runtime_switch_manager = RuntimeSwitchManager::new(self.kv_backend.clone());
        let is_recovery_mode = runtime_switch_manager
            .recovery_mode()
            .await
            .context(GetMetadataSnafu)?;
-
-        let datanode_table_manager = DatanodeTableManager::new(kv_backend.clone());
-        let table_values = datanode_table_manager
-            .tables(node_id)
-            .try_collect::<Vec<_>>()
-            .await
-            .context(GetMetadataSnafu)?;
+        let region_open_requests =
+            build_region_open_requests(node_id, self.kv_backend.clone()).await?;

        open_all_regions(
            region_server.clone(),
-            table_values,
+            region_open_requests,
            open_with_writable,
            self.opts.init_regions_parallelism,
            is_recovery_mode,
@@ -609,73 +596,24 @@ impl DatanodeBuilder {
 /// Open all regions belong to this datanode.
 async fn open_all_regions(
    region_server: RegionServer,
-    table_values: Vec<DatanodeTableValue>,
+    region_open_requests: RegionOpenRequests,
    open_with_writable: bool,
    init_regions_parallelism: usize,
    ignore_nonexistent_region: bool,
 ) -> Result<()> {
-    let mut regions = vec![];
-    #[cfg(feature = "enterprise")]
-    let mut follower_regions = vec![];
-    for table_value in table_values {
-        for region_number in table_value.regions {
-            // Augments region options with wal options if a wal options is provided.
-            let mut region_options = table_value.region_info.region_options.clone();
-            prepare_wal_options(
-                &mut region_options,
-                RegionId::new(table_value.table_id, region_number),
-                &table_value.region_info.region_wal_options,
-            );
-
-            regions.push((
-                RegionId::new(table_value.table_id, region_number),
-                table_value.region_info.engine.clone(),
-                table_value.region_info.region_storage_path.clone(),
-                region_options,
-            ));
-        }
-
+    let RegionOpenRequests {
+        leader_regions,
        #[cfg(feature = "enterprise")]
-        for region_number in table_value.follower_regions {
-            // Augments region options with wal options if a wal options is provided.
-            let mut region_options = table_value.region_info.region_options.clone();
-            prepare_wal_options(
-                &mut region_options,
-                RegionId::new(table_value.table_id, region_number),
-                &table_value.region_info.region_wal_options,
-            );
-
-            follower_regions.push((
-                RegionId::new(table_value.table_id, region_number),
-                table_value.region_info.engine.clone(),
-                table_value.region_info.region_storage_path.clone(),
-                region_options,
-            ));
-        }
-    }
-    let num_regions = regions.len();
-    info!("going to open {} region(s)", num_regions);
-
-    let mut region_requests = Vec::with_capacity(regions.len());
-    for (region_id, engine, store_path, options) in regions {
-        let table_dir = table_dir(&store_path, region_id.table_id());
-        region_requests.push((
-            region_id,
-            RegionOpenRequest {
-                engine,
-                table_dir,
-                path_type: PathType::Bare,
-                options,
-                skip_wal_replay: false,
-            },
-        ));
-    }
+        follower_regions,
+    } = region_open_requests;

+    let leader_region_num = leader_regions.len();
+    info!("going to open {} region(s)", leader_region_num);
    let now = Instant::now();
    let open_regions = region_server
        .handle_batch_open_requests(
            init_regions_parallelism,
-            region_requests,
+            leader_regions,
            ignore_nonexistent_region,
        )
        .await?;
@@ -686,19 +624,19 @@ async fn open_all_regions(
    );
    if !ignore_nonexistent_region {
        ensure!(
-            open_regions.len() == num_regions,
+            open_regions.len() == leader_region_num,
            error::UnexpectedSnafu {
                violated: format!(
                    "Expected to open {} of regions, only {} of regions has opened",
-                    num_regions,
+                    leader_region_num,
                    open_regions.len()
                )
            }
        );
-    } else if open_regions.len() != num_regions {
+    } else if open_regions.len() != leader_region_num {
        warn!(
            "ignore nonexistent region, expected to open {} of regions, only {} of regions has opened",
-            num_regions,
+            leader_region_num,
            open_regions.len()
        );
    }
@@ -717,31 +655,14 @@ async fn open_all_regions(
    if !follower_regions.is_empty() {
        use tokio::time::Instant;

-        info!(
-            "going to open {} follower region(s)",
-            follower_regions.len()
-        );
-        let mut region_requests = Vec::with_capacity(follower_regions.len());
-        let num_regions = follower_regions.len();
-        for (region_id, engine, store_path, options) in follower_regions {
-            let table_dir = table_dir(&store_path, region_id.table_id());
-            region_requests.push((
-                region_id,
-                RegionOpenRequest {
-                    engine,
-                    table_dir,
-                    path_type: PathType::Bare,
-                    options,
-                    skip_wal_replay: true,
-                },
-            ));
-        }
+        let follower_region_num = follower_regions.len();
+        info!("going to open {} follower region(s)", follower_region_num);

        let now = Instant::now();
        let open_regions = region_server
            .handle_batch_open_requests(
                init_regions_parallelism,
-                region_requests,
+                follower_regions,
                ignore_nonexistent_region,
            )
            .await?;
@@ -753,19 +674,19 @@ async fn open_all_regions(

        if !ignore_nonexistent_region {
            ensure!(
-                open_regions.len() == num_regions,
+                open_regions.len() == follower_region_num,
                error::UnexpectedSnafu {
                    violated: format!(
                        "Expected to open {} of follower regions, only {} of regions has opened",
-                        num_regions,
+                        follower_region_num,
                        open_regions.len()
                    )
                }
            );
-        } else if open_regions.len() != num_regions {
+        } else if open_regions.len() != follower_region_num {
            warn!(
                "ignore nonexistent region, expected to open {} of follower regions, only {} of regions has opened",
-                num_regions,
+                follower_region_num,
                open_regions.len()
            );
        }
@@ -835,15 +756,13 @@ mod tests {
                ..Default::default()
            },
            Plugins::default(),
-            kv_backend,
+            kv_backend.clone(),
        );
        builder.with_cache_registry(layered_cache_registry);
-
-        let kv = Arc::new(MemoryKvBackend::default()) as _;
-        setup_table_datanode(&kv).await;
+        setup_table_datanode(&(kv_backend as _)).await;

        builder
-            .initialize_region_server(&mock_region_server, kv.clone(), false)
+            .initialize_region_server(&mock_region_server, false)
            .await
            .unwrap();

--- a/src/datanode/src/heartbeat/handler.rs
+++ b/src/datanode/src/heartbeat/handler.rs
@@ -238,10 +238,7 @@ mod tests {
        // Upgrade region
        let instruction = Instruction::UpgradeRegion(UpgradeRegion {
            region_id,
-            last_entry_id: None,
-            metadata_last_entry_id: None,
-            replay_timeout: None,
-            location_id: None,
+            ..Default::default()
        });
        assert!(
            heartbeat_handler.is_acceptable(&heartbeat_env.create_handler_ctx((meta, instruction)))
--- a/src/datanode/src/heartbeat/handler/open_region.rs
+++ b/src/datanode/src/heartbeat/handler/open_region.rs
@@ -40,6 +40,7 @@ impl HandlerContext {
                path_type: PathType::Bare,
                options: region_options,
                skip_wal_replay,
+                checkpoint: None,
            });
            let result = self.region_server.handle_request(region_id, request).await;
            let success = result.is_ok();
--- a/src/datanode/src/heartbeat/handler/upgrade_region.rs
+++ b/src/datanode/src/heartbeat/handler/upgrade_region.rs
@@ -15,7 +15,7 @@
 use common_meta::instruction::{InstructionReply, UpgradeRegion, UpgradeRegionReply};
 use common_telemetry::{info, warn};
 use futures_util::future::BoxFuture;
-use store_api::region_request::{RegionCatchupRequest, RegionRequest};
+use store_api::region_request::{RegionCatchupRequest, RegionRequest, ReplayCheckpoint};

 use crate::heartbeat::handler::HandlerContext;
 use crate::heartbeat::task_tracker::WaitResult;
@@ -29,6 +29,8 @@ impl HandlerContext {
            metadata_last_entry_id,
            replay_timeout,
            location_id,
+            replay_entry_id,
+            metadata_replay_entry_id,
        }: UpgradeRegion,
    ) -> BoxFuture<'static, Option<InstructionReply>> {
        Box::pin(async move {
@@ -50,6 +52,14 @@ impl HandlerContext {

            let region_server_moved = self.region_server.clone();

+            let checkpoint = match (replay_entry_id, metadata_replay_entry_id) {
+                (Some(entry_id), metadata_entry_id) => Some(ReplayCheckpoint {
+                    entry_id,
+                    metadata_entry_id,
+                }),
+                _ => None,
+            };
+
            // The catchup task is almost zero cost if the inside region is writable.
            // Therefore, it always registers a new catchup task.
            let register_result = self
@@ -66,6 +76,7 @@ impl HandlerContext {
                                    entry_id: last_entry_id,
                                    metadata_entry_id: metadata_last_entry_id,
                                    location_id,
+                                    checkpoint,
                                }),
                            )
                            .await?;
@@ -148,10 +159,8 @@ mod tests {
                .clone()
                .handle_upgrade_region_instruction(UpgradeRegion {
                    region_id,
-                    last_entry_id: None,
-                    metadata_last_entry_id: None,
                    replay_timeout,
-                    location_id: None,
+                    ..Default::default()
                })
                .await;
            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
@@ -187,10 +196,8 @@ mod tests {
                .clone()
                .handle_upgrade_region_instruction(UpgradeRegion {
                    region_id,
-                    last_entry_id: None,
-                    metadata_last_entry_id: None,
                    replay_timeout,
-                    location_id: None,
+                    ..Default::default()
                })
                .await;
            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
@@ -227,10 +234,8 @@ mod tests {
                .clone()
                .handle_upgrade_region_instruction(UpgradeRegion {
                    region_id,
-                    last_entry_id: None,
-                    metadata_last_entry_id: None,
                    replay_timeout,
-                    location_id: None,
+                    ..Default::default()
                })
                .await;
            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
@@ -271,9 +276,7 @@ mod tests {
                .handle_upgrade_region_instruction(UpgradeRegion {
                    region_id,
                    replay_timeout,
-                    last_entry_id: None,
-                    metadata_last_entry_id: None,
-                    location_id: None,
+                    ..Default::default()
                })
                .await;
            assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
@@ -289,10 +292,8 @@ mod tests {
        let reply = handler_context
            .handle_upgrade_region_instruction(UpgradeRegion {
                region_id,
-                last_entry_id: None,
-                metadata_last_entry_id: None,
                replay_timeout: Some(Duration::from_millis(500)),
-                location_id: None,
+                ..Default::default()
            })
            .await;
        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
@@ -332,10 +333,7 @@ mod tests {
            .clone()
            .handle_upgrade_region_instruction(UpgradeRegion {
                region_id,
-                last_entry_id: None,
-                metadata_last_entry_id: None,
-                replay_timeout: None,
-                location_id: None,
+                ..Default::default()
            })
            .await;
        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
@@ -351,10 +349,8 @@ mod tests {
            .clone()
            .handle_upgrade_region_instruction(UpgradeRegion {
                region_id,
-                last_entry_id: None,
-                metadata_last_entry_id: None,
                replay_timeout: Some(Duration::from_millis(200)),
-                location_id: None,
+                ..Default::default()
            })
            .await;
        assert_matches!(reply, Some(InstructionReply::UpgradeRegion(_)));
--- a/src/datanode/src/lib.rs
+++ b/src/datanode/src/lib.rs
@@ -28,3 +28,4 @@ pub mod service;
 pub mod store;
 #[cfg(any(test, feature = "testing"))]
 pub mod tests;
+pub mod utils;
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -1410,6 +1410,7 @@ mod tests {
                    path_type: PathType::Bare,
                    options: Default::default(),
                    skip_wal_replay: false,
+                    checkpoint: None,
                }),
            )
            .await
@@ -1579,6 +1580,7 @@ mod tests {
                            path_type: PathType::Bare,
                            options: Default::default(),
                            skip_wal_replay: false,
+                            checkpoint: None,
                        },
                    ),
                    (
@@ -1589,6 +1591,7 @@ mod tests {
                            path_type: PathType::Bare,
                            options: Default::default(),
                            skip_wal_replay: false,
+                            checkpoint: None,
                        },
                    ),
                ],
@@ -1610,6 +1613,7 @@ mod tests {
                            path_type: PathType::Bare,
                            options: Default::default(),
                            skip_wal_replay: false,
+                            checkpoint: None,
                        },
                    ),
                    (
@@ -1620,6 +1624,7 @@ mod tests {
                            path_type: PathType::Bare,
                            options: Default::default(),
                            skip_wal_replay: false,
+                            checkpoint: None,
                        },
                    ),
                ],
--- a/src/datanode/src/utils.rs
+++ b/src/datanode/src/utils.rs
@@ -0,0 +1,188 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use common_meta::key::datanode_table::DatanodeTableManager;
+use common_meta::key::topic_region::{TopicRegionKey, TopicRegionManager, TopicRegionValue};
+use common_meta::kv_backend::KvBackendRef;
+use common_meta::wal_options_allocator::{extract_topic_from_wal_options, prepare_wal_options};
+use common_meta::DatanodeId;
+use futures::TryStreamExt;
+use snafu::ResultExt;
+use store_api::path_utils::table_dir;
+use store_api::region_request::{PathType, RegionOpenRequest, ReplayCheckpoint};
+use store_api::storage::{RegionId, RegionNumber};
+use tracing::info;
+
+use crate::error::{GetMetadataSnafu, Result};
+
+/// The requests to open regions.
+pub(crate) struct RegionOpenRequests {
+    pub leader_regions: Vec<(RegionId, RegionOpenRequest)>,
+    #[cfg(feature = "enterprise")]
+    pub follower_regions: Vec<(RegionId, RegionOpenRequest)>,
+}
+
+fn group_region_by_topic(
+    region_id: RegionId,
+    region_options: &HashMap<RegionNumber, String>,
+    topic_regions: &mut HashMap<String, Vec<RegionId>>,
+) {
+    if let Some(topic) = extract_topic_from_wal_options(region_id, region_options) {
+        topic_regions.entry(topic).or_default().push(region_id);
+    }
+}
+
+fn get_replay_checkpoint(
+    region_id: RegionId,
+    topic_region_values: &Option<HashMap<RegionId, TopicRegionValue>>,
+) -> Option<ReplayCheckpoint> {
+    let topic_region_values = topic_region_values.as_ref()?;
+    let topic_region_value = topic_region_values.get(&region_id);
+    let replay_checkpoint = topic_region_value.and_then(|value| value.checkpoint);
+    replay_checkpoint.map(|checkpoint| ReplayCheckpoint {
+        entry_id: checkpoint.entry_id,
+        metadata_entry_id: checkpoint.metadata_entry_id,
+    })
+}
+
+pub(crate) async fn build_region_open_requests(
+    node_id: DatanodeId,
+    kv_backend: KvBackendRef,
+) -> Result<RegionOpenRequests> {
+    let datanode_table_manager = DatanodeTableManager::new(kv_backend.clone());
+    let table_values = datanode_table_manager
+        .tables(node_id)
+        .try_collect::<Vec<_>>()
+        .await
+        .context(GetMetadataSnafu)?;
+
+    let topic_region_manager = TopicRegionManager::new(kv_backend);
+    let mut topic_regions = HashMap::<String, Vec<RegionId>>::new();
+    let mut regions = vec![];
+    #[cfg(feature = "enterprise")]
+    let mut follower_regions = vec![];
+
+    for table_value in table_values {
+        for region_number in table_value.regions {
+            let region_id = RegionId::new(table_value.table_id, region_number);
+            // Augments region options with wal options if a wal options is provided.
+            let mut region_options = table_value.region_info.region_options.clone();
+            prepare_wal_options(
+                &mut region_options,
+                region_id,
+                &table_value.region_info.region_wal_options,
+            );
+            group_region_by_topic(
+                region_id,
+                &table_value.region_info.region_wal_options,
+                &mut topic_regions,
+            );
+
+            regions.push((
+                region_id,
+                table_value.region_info.engine.clone(),
+                table_value.region_info.region_storage_path.clone(),
+                region_options,
+            ));
+        }
+
+        #[cfg(feature = "enterprise")]
+        for region_number in table_value.follower_regions {
+            let region_id = RegionId::new(table_value.table_id, region_number);
+            // Augments region options with wal options if a wal options is provided.
+            let mut region_options = table_value.region_info.region_options.clone();
+            prepare_wal_options(
+                &mut region_options,
+                RegionId::new(table_value.table_id, region_number),
+                &table_value.region_info.region_wal_options,
+            );
+            group_region_by_topic(
+                region_id,
+                &table_value.region_info.region_wal_options,
+                &mut topic_regions,
+            );
+
+            follower_regions.push((
+                RegionId::new(table_value.table_id, region_number),
+                table_value.region_info.engine.clone(),
+                table_value.region_info.region_storage_path.clone(),
+                region_options,
+            ));
+        }
+    }
+
+    let topic_region_values = if !topic_regions.is_empty() {
+        let keys = topic_regions
+            .iter()
+            .flat_map(|(topic, regions)| {
+                regions
+                    .iter()
+                    .map(|region_id| TopicRegionKey::new(*region_id, topic))
+            })
+            .collect::<Vec<_>>();
+        let topic_region_manager = topic_region_manager
+            .batch_get(keys)
+            .await
+            .context(GetMetadataSnafu)?;
+        Some(topic_region_manager)
+    } else {
+        None
+    };
+
+    let mut leader_region_requests = Vec::with_capacity(regions.len());
+    for (region_id, engine, store_path, options) in regions {
+        let table_dir = table_dir(&store_path, region_id.table_id());
+        let checkpoint = get_replay_checkpoint(region_id, &topic_region_values);
+        info!("region_id: {}, checkpoint: {:?}", region_id, checkpoint);
+        leader_region_requests.push((
+            region_id,
+            RegionOpenRequest {
+                engine,
+                table_dir,
+                path_type: PathType::Bare,
+                options,
+                skip_wal_replay: false,
+                checkpoint,
+            },
+        ));
+    }
+
+    #[cfg(feature = "enterprise")]
+    let follower_region_requests = {
+        let mut follower_region_requests = Vec::with_capacity(follower_regions.len());
+        for (region_id, engine, store_path, options) in follower_regions {
+            let table_dir = table_dir(&store_path, region_id.table_id());
+            follower_region_requests.push((
+                region_id,
+                RegionOpenRequest {
+                    engine,
+                    table_dir,
+                    path_type: PathType::Bare,
+                    options,
+                    skip_wal_replay: true,
+                    checkpoint: None,
+                },
+            ));
+        }
+        follower_region_requests
+    };
+
+    Ok(RegionOpenRequests {
+        leader_regions: leader_region_requests,
+        #[cfg(feature = "enterprise")]
+        follower_regions: follower_region_requests,
+    })
+}
--- a/src/datatypes/src/vectors/dictionary.rs
+++ b/src/datatypes/src/vectors/dictionary.rs
@@ -13,11 +13,11 @@
 // limitations under the License.

 use std::any::Any;
+use std::fmt;
 use std::sync::Arc;

-use arrow::array::Array;
-use arrow::datatypes::Int64Type;
-use arrow_array::{ArrayRef, DictionaryArray, Int64Array};
+use arrow::array::{Array, ArrayRef, DictionaryArray, PrimitiveArray, PrimitiveBuilder};
+use arrow::datatypes::{ArrowDictionaryKeyType, ArrowNativeType};
 use serde_json::Value as JsonValue;
 use snafu::ResultExt;

@@ -30,34 +30,55 @@ use crate::vectors::operations::VectorOp;
 use crate::vectors::{self, Helper, Validity, Vector, VectorRef};

 /// Vector of dictionaries, basically backed by Arrow's `DictionaryArray`.
-#[derive(Debug, PartialEq)]
-pub struct DictionaryVector {
-    array: DictionaryArray<Int64Type>,
+pub struct DictionaryVector<K: ArrowDictionaryKeyType> {
+    array: DictionaryArray<K>,
+    /// The datatype of the keys in the dictionary.
+    key_type: ConcreteDataType,
    /// The datatype of the items in the dictionary.
    item_type: ConcreteDataType,
    /// The vector of items in the dictionary.
    item_vector: VectorRef,
 }

-impl DictionaryVector {
+impl<K: ArrowDictionaryKeyType> fmt::Debug for DictionaryVector<K> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("DictionaryVector")
+            .field("array", &self.array)
+            .field("key_type", &self.key_type)
+            .field("item_type", &self.item_type)
+            .finish()
+    }
+}
+
+impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryVector<K> {
+    fn eq(&self, other: &DictionaryVector<K>) -> bool {
+        self.array == other.array
+            && self.key_type == other.key_type
+            && self.item_type == other.item_type
+    }
+}
+
+impl<K: ArrowDictionaryKeyType> DictionaryVector<K> {
    /// Create a new instance of `DictionaryVector` from a dictionary array and item type
-    pub fn new(array: DictionaryArray<Int64Type>, item_type: ConcreteDataType) -> Result<Self> {
+    pub fn new(array: DictionaryArray<K>, item_type: ConcreteDataType) -> Result<Self> {
+        let key_type = ConcreteDataType::try_from(&K::DATA_TYPE)?;
        let item_vector = Helper::try_into_vector(array.values())?;

        Ok(Self {
            array,
+            key_type,
            item_type,
            item_vector,
        })
    }

    /// Returns the underlying Arrow dictionary array
-    pub fn array(&self) -> &DictionaryArray<Int64Type> {
+    pub fn array(&self) -> &DictionaryArray<K> {
        &self.array
    }

    /// Returns the keys array of this dictionary
-    pub fn keys(&self) -> &arrow_array::PrimitiveArray<Int64Type> {
+    pub fn keys(&self) -> &arrow_array::PrimitiveArray<K> {
        self.array.keys()
    }

@@ -71,10 +92,10 @@ impl DictionaryVector {
    }
 }

-impl Vector for DictionaryVector {
+impl<K: ArrowDictionaryKeyType> Vector for DictionaryVector<K> {
    fn data_type(&self) -> ConcreteDataType {
        ConcreteDataType::Dictionary(DictionaryType::new(
-            ConcreteDataType::int64_datatype(),
+            self.key_type.clone(),
            self.item_type.clone(),
        ))
    }
@@ -118,6 +139,7 @@ impl Vector for DictionaryVector {
    fn slice(&self, offset: usize, length: usize) -> VectorRef {
        Arc::new(Self {
            array: self.array.slice(offset, length),
+            key_type: self.key_type.clone(),
            item_type: self.item_type.clone(),
            item_vector: self.item_vector.clone(),
        })
@@ -129,7 +151,7 @@ impl Vector for DictionaryVector {
        }

        let key = self.array.keys().value(index);
-        self.item_vector.get(key as usize)
+        self.item_vector.get(key.as_usize())
    }

    fn get_ref(&self, index: usize) -> ValueRef {
@@ -138,11 +160,11 @@ impl Vector for DictionaryVector {
        }

        let key = self.array.keys().value(index);
-        self.item_vector.get_ref(key as usize)
+        self.item_vector.get_ref(key.as_usize())
    }
 }

-impl Serializable for DictionaryVector {
+impl<K: ArrowDictionaryKeyType> Serializable for DictionaryVector<K> {
    fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
        // Convert the dictionary array to JSON, where each element is either null or
        // the value it refers to in the dictionary
@@ -153,7 +175,7 @@ impl Serializable for DictionaryVector {
                result.push(JsonValue::Null);
            } else {
                let key = self.array.keys().value(i);
-                let value = self.item_vector.get(key as usize);
+                let value = self.item_vector.get(key.as_usize());
                let json_value = serde_json::to_value(value).context(error::SerializeSnafu)?;
                result.push(json_value);
            }
@@ -163,33 +185,35 @@ impl Serializable for DictionaryVector {
    }
 }

-impl TryFrom<DictionaryArray<Int64Type>> for DictionaryVector {
+impl<K: ArrowDictionaryKeyType> TryFrom<DictionaryArray<K>> for DictionaryVector<K> {
    type Error = crate::error::Error;

-    fn try_from(array: DictionaryArray<Int64Type>) -> Result<Self> {
-        let item_type = ConcreteDataType::from_arrow_type(array.values().data_type());
+    fn try_from(array: DictionaryArray<K>) -> Result<Self> {
+        let key_type = ConcreteDataType::try_from(array.keys().data_type())?;
+        let item_type = ConcreteDataType::try_from(array.values().data_type())?;
        let item_vector = Helper::try_into_vector(array.values())?;

        Ok(Self {
            array,
+            key_type,
            item_type,
            item_vector,
        })
    }
 }

-pub struct DictionaryIter<'a> {
-    vector: &'a DictionaryVector,
+pub struct DictionaryIter<'a, K: ArrowDictionaryKeyType> {
+    vector: &'a DictionaryVector<K>,
    idx: usize,
 }

-impl<'a> DictionaryIter<'a> {
-    pub fn new(vector: &'a DictionaryVector) -> DictionaryIter<'a> {
+impl<'a, K: ArrowDictionaryKeyType> DictionaryIter<'a, K> {
+    pub fn new(vector: &'a DictionaryVector<K>) -> DictionaryIter<'a, K> {
        DictionaryIter { vector, idx: 0 }
    }
 }

-impl<'a> Iterator for DictionaryIter<'a> {
+impl<'a, K: ArrowDictionaryKeyType> Iterator for DictionaryIter<'a, K> {
    type Item = Option<ValueRef<'a>>;

    #[inline]
@@ -205,7 +229,7 @@ impl<'a> Iterator for DictionaryIter<'a> {
            return Some(None);
        }

-        Some(Some(self.vector.item_vector.get_ref(self.idx)))
+        Some(Some(self.vector.get_ref(idx)))
    }

    #[inline]
@@ -217,10 +241,10 @@ impl<'a> Iterator for DictionaryIter<'a> {
    }
 }

-impl VectorOp for DictionaryVector {
+impl<K: ArrowDictionaryKeyType> VectorOp for DictionaryVector<K> {
    fn replicate(&self, offsets: &[usize]) -> VectorRef {
        let keys = self.array.keys();
-        let mut replicated_keys = Vec::with_capacity(offsets.len());
+        let mut replicated_keys = PrimitiveBuilder::new();

        let mut previous_offset = 0;
        for (i, &offset) in offsets.iter().enumerate() {
@@ -236,19 +260,20 @@ impl VectorOp for DictionaryVector {

            // repeat this key (offset - previous_offset) times
            let repeat_count = offset - previous_offset;
-            if repeat_count > 0 {
-                replicated_keys.resize(replicated_keys.len() + repeat_count, key);
+            for _ in 0..repeat_count {
+                replicated_keys.append_option(key);
            }

            previous_offset = offset;
        }

-        let new_keys = Int64Array::from(replicated_keys);
+        let new_keys = replicated_keys.finish();
        let new_array = DictionaryArray::try_new(new_keys, self.values().clone())
            .expect("Failed to create replicated dictionary array");

        Arc::new(Self {
            array: new_array,
+            key_type: self.key_type.clone(),
            item_type: self.item_type.clone(),
            item_vector: self.item_vector.clone(),
        })
@@ -261,7 +286,7 @@ impl VectorOp for DictionaryVector {
        let filtered_key_array = filtered_key_vector.to_arrow_array();
        let filtered_key_array = filtered_key_array
            .as_any()
-            .downcast_ref::<Int64Array>()
+            .downcast_ref::<PrimitiveArray<K>>()
            .unwrap();

        let new_array = DictionaryArray::try_new(filtered_key_array.clone(), self.values().clone())
@@ -269,6 +294,7 @@ impl VectorOp for DictionaryVector {

        Ok(Arc::new(Self {
            array: new_array,
+            key_type: self.key_type.clone(),
            item_type: self.item_type.clone(),
            item_vector: self.item_vector.clone(),
        }))
@@ -281,6 +307,7 @@ impl VectorOp for DictionaryVector {
                .expect("Failed to create casted dictionary array");
        Ok(Arc::new(Self {
            array: new_array,
+            key_type: self.key_type.clone(),
            item_type: to_type.clone(),
            item_vector: self.item_vector.clone(),
        }))
@@ -291,13 +318,17 @@ impl VectorOp for DictionaryVector {
        let key_vector = Helper::try_into_vector(&key_array)?;
        let new_key_vector = key_vector.take(indices)?;
        let new_key_array = new_key_vector.to_arrow_array();
-        let new_key_array = new_key_array.as_any().downcast_ref::<Int64Array>().unwrap();
+        let new_key_array = new_key_array
+            .as_any()
+            .downcast_ref::<PrimitiveArray<K>>()
+            .unwrap();

        let new_array = DictionaryArray::try_new(new_key_array.clone(), self.values().clone())
            .expect("Failed to create filtered dictionary array");

        Ok(Arc::new(Self {
            array: new_array,
+            key_type: self.key_type.clone(),
            item_type: self.item_type.clone(),
            item_vector: self.item_vector.clone(),
        }))
@@ -308,19 +339,20 @@ impl VectorOp for DictionaryVector {
 mod tests {
    use std::sync::Arc;

-    use arrow_array::StringArray;
+    use arrow::array::{Int64Array, StringArray, UInt32Array};
+    use arrow::datatypes::{Int64Type, UInt32Type};

    use super::*;

    // Helper function to create a test dictionary vector with string values
-    fn create_test_dictionary() -> DictionaryVector {
+    fn create_test_dictionary() -> DictionaryVector<Int64Type> {
        // Dictionary values: ["a", "b", "c", "d"]
        // Keys: [0, 1, 2, null, 1, 3]
        // Resulting in: ["a", "b", "c", null, "b", "d"]
        let values = StringArray::from(vec!["a", "b", "c", "d"]);
        let keys = Int64Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]);
        let dict_array = DictionaryArray::new(keys, Arc::new(values));
-        DictionaryVector::try_from(dict_array).unwrap()
+        DictionaryVector::<Int64Type>::try_from(dict_array).unwrap()
    }

    #[test]
@@ -435,4 +467,19 @@ mod tests {
        assert_eq!(taken.get(1), Value::String("a".to_string().into()));
        assert_eq!(taken.get(2), Value::String("b".to_string().into()));
    }
+
+    #[test]
+    fn test_other_type() {
+        let values = StringArray::from(vec!["a", "b", "c", "d"]);
+        let keys = UInt32Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+        let dict_vec = DictionaryVector::<UInt32Type>::try_from(dict_array).unwrap();
+        assert_eq!(
+            ConcreteDataType::dictionary_datatype(
+                ConcreteDataType::uint32_datatype(),
+                ConcreteDataType::string_datatype()
+            ),
+            dict_vec.data_type()
+        );
+    }
 }
--- a/src/datatypes/src/vectors/helper.rs
+++ b/src/datatypes/src/vectors/helper.rs
@@ -20,7 +20,10 @@ use std::sync::Arc;
 use arrow::array::{Array, ArrayRef, StringArray};
 use arrow::compute;
 use arrow::compute::kernels::comparison;
-use arrow::datatypes::{DataType as ArrowDataType, Int64Type, TimeUnit};
+use arrow::datatypes::{
+    DataType as ArrowDataType, Int16Type, Int32Type, Int64Type, Int8Type, TimeUnit, UInt16Type,
+    UInt32Type, UInt64Type, UInt8Type,
+};
 use arrow_array::{DictionaryArray, StructArray};
 use arrow_schema::IntervalUnit;
 use datafusion_common::ScalarValue;
@@ -125,6 +128,10 @@ impl Helper {
            ScalarValue::Boolean(v) => {
                ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
            }
+            ScalarValue::Float16(v) => ConstantVector::new(
+                Arc::new(Float32Vector::from(vec![v.map(f32::from)])),
+                length,
+            ),
            ScalarValue::Float32(v) => {
                ConstantVector::new(Arc::new(Float32Vector::from(vec![v])), length)
            }
@@ -240,7 +247,6 @@ impl Helper {
            | ScalarValue::LargeList(_)
            | ScalarValue::Dictionary(_, _)
            | ScalarValue::Union(_, _, _)
-            | ScalarValue::Float16(_)
            | ScalarValue::Utf8View(_)
            | ScalarValue::BinaryView(_)
            | ScalarValue::Map(_)
@@ -351,16 +357,37 @@ impl Helper {
            ArrowDataType::Decimal128(_, _) => {
                Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
            }
-            ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int64) => {
-                let array = array
-                    .as_ref()
-                    .as_any()
-                    .downcast_ref::<DictionaryArray<Int64Type>>()
-                    .unwrap(); // Safety: the type is guarded by match arm condition
-                Arc::new(DictionaryVector::new(
-                    array.clone(),
-                    ConcreteDataType::try_from(value.as_ref())?,
-                )?)
+            ArrowDataType::Dictionary(key, value) => {
+                macro_rules! handle_dictionary_key_type {
+                    ($key_type:ident) => {{
+                        let array = array
+                            .as_ref()
+                            .as_any()
+                            .downcast_ref::<DictionaryArray<$key_type>>()
+                            .unwrap(); // Safety: the type is guarded by match arm condition
+                        Arc::new(DictionaryVector::new(
+                            array.clone(),
+                            ConcreteDataType::try_from(value.as_ref())?,
+                        )?)
+                    }};
+                }
+
+                match key.as_ref() {
+                    ArrowDataType::Int8 => handle_dictionary_key_type!(Int8Type),
+                    ArrowDataType::Int16 => handle_dictionary_key_type!(Int16Type),
+                    ArrowDataType::Int32 => handle_dictionary_key_type!(Int32Type),
+                    ArrowDataType::Int64 => handle_dictionary_key_type!(Int64Type),
+                    ArrowDataType::UInt8 => handle_dictionary_key_type!(UInt8Type),
+                    ArrowDataType::UInt16 => handle_dictionary_key_type!(UInt16Type),
+                    ArrowDataType::UInt32 => handle_dictionary_key_type!(UInt32Type),
+                    ArrowDataType::UInt64 => handle_dictionary_key_type!(UInt64Type),
+                    _ => {
+                        return error::UnsupportedArrowTypeSnafu {
+                            arrow_type: array.as_ref().data_type().clone(),
+                        }
+                        .fail()
+                    }
+                }
            }

            ArrowDataType::Struct(_fields) => {
@@ -375,7 +402,6 @@ impl Helper {
            | ArrowDataType::LargeList(_)
            | ArrowDataType::FixedSizeList(_, _)
            | ArrowDataType::Union(_, _)
-            | ArrowDataType::Dictionary(_, _)
            | ArrowDataType::Decimal256(_, _)
            | ArrowDataType::Map(_, _)
            | ArrowDataType::RunEndEncoded(_, _)
@@ -629,10 +655,55 @@ mod tests {
        check_try_into_vector(Time64MicrosecondArray::from(vec![1, 2, 3]));
        check_try_into_vector(Time64NanosecondArray::from(vec![1, 2, 3]));

+        // Test dictionary arrays with different key types
        let values = StringArray::from_iter_values(["a", "b", "c"]);
+
+        // Test Int8 keys
        let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test Int16 keys
+        let keys = Int16Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test Int32 keys
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test Int64 keys
+        let keys = Int64Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test UInt8 keys
+        let keys = UInt8Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test UInt16 keys
+        let keys = UInt16Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test UInt32 keys
+        let keys = UInt32Array::from_iter_values([0, 0, 1, 2]);
+        let array: ArrayRef =
+            Arc::new(DictionaryArray::try_new(keys, Arc::new(values.clone())).unwrap());
+        Helper::try_into_vector(array).unwrap();
+
+        // Test UInt64 keys
+        let keys = UInt64Array::from_iter_values([0, 0, 1, 2]);
        let array: ArrayRef = Arc::new(DictionaryArray::try_new(keys, Arc::new(values)).unwrap());
-        Helper::try_into_vector(array).unwrap_err();
+        Helper::try_into_vector(array).unwrap();
    }

    #[test]
--- a/src/file-engine/src/region.rs
+++ b/src/file-engine/src/region.rs
@@ -178,6 +178,7 @@ mod tests {
            path_type: PathType::Bare,
            options: HashMap::default(),
            skip_wal_replay: false,
+            checkpoint: None,
        };

        let region = FileRegion::open(region_id, request, &object_store)
@@ -230,6 +231,7 @@ mod tests {
            path_type: PathType::Bare,
            options: HashMap::default(),
            skip_wal_replay: false,
+            checkpoint: None,
        };
        let err = FileRegion::open(region_id, request, &object_store)
            .await
--- a/src/flow/Cargo.toml
+++ b/src/flow/Cargo.toml
@@ -63,6 +63,7 @@ prost.workspace = true
 query.workspace = true
 rand.workspace = true
 serde.workspace = true
+serde_json.workspace = true
 servers.workspace = true
 session.workspace = true
 smallvec.workspace = true
@@ -81,6 +82,5 @@ common-catalog.workspace = true
 pretty_assertions.workspace = true
 prost.workspace = true
 query.workspace = true
-serde_json = "1.0"
 session.workspace = true
 table.workspace = true
--- a/src/flow/src/adapter.rs
+++ b/src/flow/src/adapter.rs
@@ -773,6 +773,7 @@ impl StreamingEngine {
            create_if_not_exists,
            or_replace,
            expire_after,
+            eval_interval: _,
            comment,
            sql,
            flow_options,
--- a/src/flow/src/adapter/flownode_impl.rs
+++ b/src/flow/src/adapter/flownode_impl.rs
@@ -318,6 +318,7 @@ impl FlowDualEngine {
                        create_if_not_exists: true,
                        or_replace: true,
                        expire_after: info.expire_after(),
+                        eval_interval: info.eval_interval(),
                        comment: Some(info.comment().clone()),
                        sql: info.raw_sql().clone(),
                        flow_options: info.options().clone(),
@@ -770,6 +771,7 @@ impl common_meta::node_manager::Flownode for FlowDualEngine {
                sink_table_name: Some(sink_table_name),
                create_if_not_exists,
                expire_after,
+                eval_interval,
                comment,
                sql,
                flow_options,
@@ -789,6 +791,7 @@ impl common_meta::node_manager::Flownode for FlowDualEngine {
                    create_if_not_exists,
                    or_replace,
                    expire_after,
+                    eval_interval: eval_interval.map(|e| e.seconds),
                    comment: Some(comment),
                    sql: sql.clone(),
                    flow_options,
--- a/src/flow/src/batching_mode/engine.rs
+++ b/src/flow/src/batching_mode/engine.rs
@@ -16,6 +16,7 @@

 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::sync::Arc;
+use std::time::Duration;

 use api::v1::flow::{DirtyWindowRequests, FlowResponse};
 use catalog::CatalogManagerRef;
@@ -28,9 +29,15 @@ use common_runtime::JoinHandle;
 use common_telemetry::tracing::warn;
 use common_telemetry::{debug, info};
 use common_time::TimeToLive;
+use datafusion_common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
+use datafusion_expr::LogicalPlan;
+use datatypes::prelude::ConcreteDataType;
 use query::QueryEngineRef;
+use session::context::QueryContext;
 use snafu::{ensure, OptionExt, ResultExt};
+use sql::parsers::utils::is_tql;
 use store_api::storage::{RegionId, TableId};
+use table::table_reference::TableReference;
 use tokio::sync::{oneshot, RwLock};

 use crate::batching_mode::frontend_client::FrontendClient;
@@ -40,8 +47,8 @@ use crate::batching_mode::utils::sql_to_df_plan;
 use crate::batching_mode::BatchingModeOptions;
 use crate::engine::FlowEngine;
 use crate::error::{
-    ExternalSnafu, FlowAlreadyExistSnafu, FlowNotFoundSnafu, TableNotFoundMetaSnafu,
-    UnexpectedSnafu, UnsupportedSnafu,
+    CreateFlowSnafu, DatafusionSnafu, ExternalSnafu, FlowAlreadyExistSnafu, FlowNotFoundSnafu,
+    InvalidQuerySnafu, TableNotFoundMetaSnafu, UnexpectedSnafu, UnsupportedSnafu,
 };
 use crate::metrics::METRIC_FLOW_BATCHING_ENGINE_BULK_MARK_TIME_WINDOW;
 use crate::{CreateFlowArgs, Error, FlowId, TableName};
@@ -149,9 +156,11 @@ impl BatchingEngine {
            let handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
                let src_table_names = &task.config.source_table_names;
                let mut all_dirty_windows = HashSet::new();
+                let mut is_dirty = false;
                for src_table_name in src_table_names {
                    if let Some((timestamps, unit)) = group_by_table_name.get(src_table_name) {
                        let Some(expr) = &task.config.time_window_expr else {
+                            is_dirty = true;
                            continue;
                        };
                        for timestamp in timestamps {
@@ -166,6 +175,9 @@ impl BatchingEngine {
                    }
                }
                let mut state = task.state.write().unwrap();
+                if is_dirty {
+                    state.dirty_time_windows.set_dirty();
+                }
                let flow_id_label = task.config.flow_id.to_string();
                for timestamp in all_dirty_windows {
                    state.dirty_time_windows.add_window(timestamp, None);
@@ -267,9 +279,12 @@ impl BatchingEngine {
            let handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
                let src_table_names = &task.config.source_table_names;

+                let mut is_dirty = false;
+
                for src_table_name in src_table_names {
                    if let Some(entry) = group_by_table_name.get(src_table_name) {
                        let Some(expr) = &task.config.time_window_expr else {
+                            is_dirty = true;
                            continue;
                        };
                        let involved_time_windows = expr.handle_rows(entry.clone()).await?;
@@ -279,6 +294,10 @@ impl BatchingEngine {
                            .add_lower_bounds(involved_time_windows.into_iter());
                    }
                }
+                if is_dirty {
+                    task.state.write().unwrap().dirty_time_windows.set_dirty();
+                }
+
                Ok(())
            });
            handles.push(handle);
@@ -335,6 +354,7 @@ impl BatchingEngine {
            create_if_not_exists,
            or_replace,
            expire_after,
+            eval_interval,
            comment: _,
            sql,
            flow_options,
@@ -361,6 +381,24 @@ impl BatchingEngine {
            }
        }

+        let query_ctx = query_ctx.context({
+            UnexpectedSnafu {
+                reason: "Query context is None".to_string(),
+            }
+        })?;
+        let query_ctx = Arc::new(query_ctx);
+        let is_tql = is_tql(query_ctx.sql_dialect(), &sql)
+            .map_err(BoxedError::new)
+            .context(CreateFlowSnafu { sql: &sql })?;
+
+        // optionally set a eval interval for the flow
+        if eval_interval.is_none() && is_tql {
+            InvalidQuerySnafu {
+                reason: "TQL query requires EVAL INTERVAL to be set".to_string(),
+            }
+            .fail()?;
+        }
+
        let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);

        ensure!(
@@ -374,13 +412,6 @@ impl BatchingEngine {
            }
        );

-        let Some(query_ctx) = query_ctx else {
-            UnexpectedSnafu {
-                reason: "Query context is None".to_string(),
-            }
-            .fail()?
-        };
-        let query_ctx = Arc::new(query_ctx);
        let mut source_table_names = Vec::with_capacity(2);
        for src_id in source_table_ids {
            // also check table option to see if ttl!=instant
@@ -403,6 +434,11 @@ impl BatchingEngine {
        let (tx, rx) = oneshot::channel();

        let plan = sql_to_df_plan(query_ctx.clone(), self.query_engine.clone(), &sql, true).await?;
+
+        if is_tql {
+            self.check_is_tql_table(&plan, &query_ctx).await?;
+        }
+
        let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
            &plan,
            self.query_engine.engine_state().catalog_manager().clone(),
@@ -442,6 +478,7 @@ impl BatchingEngine {
            catalog_manager: self.catalog_manager.clone(),
            shutdown_rx: rx,
            batch_opts: self.batch_opts.clone(),
+            flow_eval_interval: eval_interval.map(|secs| Duration::from_secs(secs as u64)),
        };

        let task = BatchingTask::try_new(task_args)?;
@@ -468,6 +505,131 @@ impl BatchingEngine {
        Ok(Some(flow_id))
    }

+    async fn check_is_tql_table(
+        &self,
+        query: &LogicalPlan,
+        query_ctx: &QueryContext,
+    ) -> Result<(), Error> {
+        struct CollectTableRef {
+            table_refs: HashSet<datafusion_common::TableReference>,
+        }
+
+        impl TreeNodeVisitor<'_> for CollectTableRef {
+            type Node = LogicalPlan;
+            fn f_down(
+                &mut self,
+                node: &Self::Node,
+            ) -> datafusion_common::Result<TreeNodeRecursion> {
+                if let LogicalPlan::TableScan(scan) = node {
+                    self.table_refs.insert(scan.table_name.clone());
+                }
+                Ok(TreeNodeRecursion::Continue)
+            }
+        }
+        let mut table_refs = CollectTableRef {
+            table_refs: HashSet::new(),
+        };
+        query
+            .visit_with_subqueries(&mut table_refs)
+            .context(DatafusionSnafu {
+                context: "Checking if all source tables are TQL tables",
+            })?;
+
+        let default_catalog = query_ctx.current_catalog();
+        let default_schema = query_ctx.current_schema();
+        let default_schema = &default_schema;
+
+        for table_ref in table_refs.table_refs {
+            let table_ref = match &table_ref {
+                datafusion_common::TableReference::Bare { table } => {
+                    TableReference::full(default_catalog, default_schema, table)
+                }
+                datafusion_common::TableReference::Partial { schema, table } => {
+                    TableReference::full(default_catalog, schema, table)
+                }
+                datafusion_common::TableReference::Full {
+                    catalog,
+                    schema,
+                    table,
+                } => TableReference::full(catalog, schema, table),
+            };
+
+            let table_id = self
+                .table_meta
+                .table_name_manager()
+                .get(table_ref.into())
+                .await
+                .map_err(BoxedError::new)
+                .context(ExternalSnafu)?
+                .with_context(|| UnexpectedSnafu {
+                    reason: format!("Failed to get table id for table: {}", table_ref),
+                })?
+                .table_id();
+            let table_info =
+                get_table_info(self.table_meta.table_info_manager(), &table_id).await?;
+            // first check if it's only one f64 value column
+            let value_cols = table_info
+                .table_info
+                .meta
+                .schema
+                .column_schemas
+                .iter()
+                .filter(|col| col.data_type == ConcreteDataType::float64_datatype())
+                .collect::<Vec<_>>();
+            ensure!(
+                value_cols.len() == 1,
+                InvalidQuerySnafu {
+                    reason: format!(
+                        "TQL query only supports one f64 value column, table `{}`(id={}) has {} f64 value columns, columns are: {:?}",
+                        table_ref,
+                        table_id,
+                        value_cols.len(),
+                        value_cols
+                    ),
+                }
+            );
+            // TODO(discord9): do need to check rest columns is string and is tag column?
+            let pk_idxs = table_info
+                .table_info
+                .meta
+                .primary_key_indices
+                .iter()
+                .collect::<HashSet<_>>();
+
+            for (idx, col) in table_info
+                .table_info
+                .meta
+                .schema
+                .column_schemas
+                .iter()
+                .enumerate()
+            {
+                // three cases:
+                // 1. val column
+                // 2. timestamp column
+                // 3. tag column (string)
+
+                let is_pk: bool = pk_idxs.contains(&&idx);
+
+                ensure!(
+                        col.data_type == ConcreteDataType::float64_datatype()
+                        || col.data_type.is_timestamp()
+                        || (col.data_type == ConcreteDataType::string_datatype() && is_pk),
+                        InvalidQuerySnafu {
+                            reason: format!(
+                                "TQL query only supports f64 value column, timestamp column and string tag columns, table `{}`(id={}) has column `{}` with type {:?} which is not supported",
+                                table_ref,
+                                table_id,
+                                col.name,
+                                col.data_type
+                            ),
+                        }
+                    );
+            }
+        }
+        Ok(())
+    }
+
    pub async fn remove_flow_inner(&self, flow_id: FlowId) -> Result<(), Error> {
        if self.tasks.write().await.remove(&flow_id).is_none() {
            warn!("Flow {flow_id} not found in tasks");
--- a/src/flow/src/batching_mode/frontend_client.rs
+++ b/src/flow/src/batching_mode/frontend_client.rs
@@ -41,8 +41,8 @@ use snafu::{OptionExt, ResultExt};

 use crate::batching_mode::BatchingModeOptions;
 use crate::error::{
-    ExternalSnafu, InvalidClientConfigSnafu, InvalidRequestSnafu, NoAvailableFrontendSnafu,
-    UnexpectedSnafu,
+    CreateSinkTableSnafu, ExternalSnafu, InvalidClientConfigSnafu, InvalidRequestSnafu,
+    NoAvailableFrontendSnafu, UnexpectedSnafu,
 };
 use crate::{Error, FlowAuthHeader};

@@ -290,13 +290,17 @@ impl FrontendClient {
    ) -> Result<u32, Error> {
        self.handle(
            Request::Ddl(api::v1::DdlRequest {
-                expr: Some(api::v1::ddl_request::Expr::CreateTable(create)),
+                expr: Some(api::v1::ddl_request::Expr::CreateTable(create.clone())),
            }),
            catalog,
            schema,
            &mut None,
        )
        .await
+        .map_err(BoxedError::new)
+        .with_context(|_| CreateSinkTableSnafu {
+            create: create.clone(),
+        })
    }

    /// Execute a SQL statement on the frontend.
--- a/src/flow/src/batching_mode/state.rs
+++ b/src/flow/src/batching_mode/state.rs
@@ -203,11 +203,21 @@ impl DirtyTimeWindows {
        self.windows.clear();
    }

+    /// Set windows to be dirty, only useful for full aggr without time window
+    /// to mark some new data is inserted
+    pub fn set_dirty(&mut self) {
+        self.windows.insert(Timestamp::new_second(0), None);
+    }
+
    /// Number of dirty windows.
    pub fn len(&self) -> usize {
        self.windows.len()
    }

+    pub fn is_empty(&self) -> bool {
+        self.windows.is_empty()
+    }
+
    /// Get the effective count of time windows, which is the number of time windows that can be
    /// used for query, compute from total time window range divided by `window_size`.
    pub fn effective_count(&self, window_size: &Duration) -> usize {
--- a/src/flow/src/batching_mode/task.rs
+++ b/src/flow/src/batching_mode/task.rs
@@ -17,7 +17,6 @@ use std::sync::{Arc, RwLock};
 use std::time::{Duration, SystemTime, UNIX_EPOCH};

 use api::v1::CreateTableExpr;
-use arrow_schema::Fields;
 use catalog::CatalogManagerRef;
 use common_error::ext::BoxedError;
 use common_query::logical_plan::breakup_insert_plan;
@@ -49,8 +48,8 @@ use crate::batching_mode::frontend_client::FrontendClient;
 use crate::batching_mode::state::{FilterExprInfo, TaskState};
 use crate::batching_mode::time_window::TimeWindowExpr;
 use crate::batching_mode::utils::{
-    get_table_info_df_schema, sql_to_df_plan, AddAutoColumnRewriter, AddFilterRewriter,
-    FindGroupByFinalName,
+    gen_plan_with_matching_schema, get_table_info_df_schema, sql_to_df_plan, AddFilterRewriter,
+    ColumnMatcherRewriter, FindGroupByFinalName,
 };
 use crate::batching_mode::BatchingModeOptions;
 use crate::df_optimizer::apply_df_optimizer;
@@ -75,11 +74,12 @@ pub struct TaskConfig {
    pub time_window_expr: Option<TimeWindowExpr>,
    /// in seconds
    pub expire_after: Option<i64>,
-    sink_table_name: [String; 3],
+    pub sink_table_name: [String; 3],
    pub source_table_names: HashSet<[String; 3]>,
-    catalog_manager: CatalogManagerRef,
-    query_type: QueryType,
-    batch_opts: Arc<BatchingModeOptions>,
+    pub catalog_manager: CatalogManagerRef,
+    pub query_type: QueryType,
+    pub batch_opts: Arc<BatchingModeOptions>,
+    pub flow_eval_interval: Option<Duration>,
 }

 fn determine_query_type(query: &str, query_ctx: &QueryContextRef) -> Result<QueryType, Error> {
@@ -101,8 +101,8 @@ fn determine_query_type(query: &str, query_ctx: &QueryContextRef) -> Result<Quer
    }
 }

-#[derive(Debug, Clone)]
-enum QueryType {
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum QueryType {
    /// query is a tql query
    Tql,
    /// query is a sql query
@@ -128,6 +128,7 @@ pub struct TaskArgs<'a> {
    pub catalog_manager: CatalogManagerRef,
    pub shutdown_rx: oneshot::Receiver<()>,
    pub batch_opts: Arc<BatchingModeOptions>,
+    pub flow_eval_interval: Option<Duration>,
 }

 pub struct PlanInfo {
@@ -150,6 +151,7 @@ impl BatchingTask {
            catalog_manager,
            shutdown_rx,
            batch_opts,
+            flow_eval_interval,
        }: TaskArgs<'_>,
    ) -> Result<Self, Error> {
        Ok(Self {
@@ -164,6 +166,7 @@ impl BatchingTask {
                output_schema: plan.schema().clone(),
                query_type: determine_query_type(query, &query_ctx)?,
                batch_opts,
+                flow_eval_interval,
            }),
            state: Arc::new(RwLock::new(TaskState::new(query_ctx, shutdown_rx))),
        })
@@ -452,6 +455,13 @@ impl BatchingTask {
    ) {
        let flow_id_str = self.config.flow_id.to_string();
        let mut max_window_cnt = None;
+        let mut interval = self
+            .config
+            .flow_eval_interval
+            .map(|d| tokio::time::interval(d));
+        if let Some(tick) = &mut interval {
+            tick.tick().await; // pass the first tick immediately
+        }
        loop {
            // first check if shutdown signal is received
            // if so, break the loop
@@ -499,24 +509,33 @@ impl BatchingTask {
                    max_window_cnt = max_window_cnt.map(|cnt| {
                        (cnt + 1).min(self.config.batch_opts.experimental_max_filter_num_per_query)
                    });
-                    let sleep_until = {
-                        let state = self.state.write().unwrap();

-                        let time_window_size = self
-                            .config
-                            .time_window_expr
-                            .as_ref()
-                            .and_then(|t| *t.time_window_size());
+                    // here use proper ticking if set eval interval
+                    if let Some(eval_interval) = &mut interval {
+                        eval_interval.tick().await;
+                    } else {
+                        // if not explicitly set, just automatically calculate next start time
+                        // using time window size and more args
+                        let sleep_until = {
+                            let state = self.state.write().unwrap();

-                        state.get_next_start_query_time(
-                            self.config.flow_id,
-                            &time_window_size,
-                            min_refresh,
-                            Some(self.config.batch_opts.query_timeout),
-                            self.config.batch_opts.experimental_max_filter_num_per_query,
-                        )
+                            let time_window_size = self
+                                .config
+                                .time_window_expr
+                                .as_ref()
+                                .and_then(|t| *t.time_window_size());
+
+                            state.get_next_start_query_time(
+                                self.config.flow_id,
+                                &time_window_size,
+                                min_refresh,
+                                Some(self.config.batch_opts.query_timeout),
+                                self.config.batch_opts.experimental_max_filter_num_per_query,
+                            )
+                        };
+
+                        tokio::time::sleep_until(sleep_until).await;
                    };
-                    tokio::time::sleep_until(sleep_until).await;
                }
                // no new data, sleep for some time before checking for new data
                Ok(None) => {
@@ -569,7 +588,7 @@ impl BatchingTask {
        let query_ctx = self.state.read().unwrap().query_ctx.clone();
        let plan =
            sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, true).await?;
-        create_table_with_expr(&plan, &self.config.sink_table_name)
+        create_table_with_expr(&plan, &self.config.sink_table_name, &self.config.query_type)
    }

    /// will merge and use the first ten time window in query
@@ -599,42 +618,63 @@ impl BatchingTask {
            .map(|expr| expr.eval(low_bound))
            .transpose()?;

-        let (Some((Some(l), Some(u))), QueryType::Sql) =
-            (expire_time_window_bound, &self.config.query_type)
-        else {
-            // either no time window or not a sql query, then just use the original query
-            // use sink_table_meta to add to query the `update_at` and `__ts_placeholder` column's value too for compatibility reason
-            debug!(
-                "Flow id = {:?}, can't get window size: precise_lower_bound={expire_time_window_bound:?}, using the same query", self.config.flow_id
-            );
-            // clean dirty time window too, this could be from create flow's check_execute
-            self.state.write().unwrap().dirty_time_windows.clean();
+        let (expire_lower_bound, expire_upper_bound) =
+            match (expire_time_window_bound, &self.config.query_type) {
+                (Some((Some(l), Some(u))), QueryType::Sql) => (l, u),
+                (None, QueryType::Sql) => {
+                    // if it's sql query and no time window lower/upper bound is found, just return the original query(with auto columns)
+                    // use sink_table_meta to add to query the `update_at` and `__ts_placeholder` column's value too for compatibility reason
+                    debug!(
+                        "Flow id = {:?}, no time window, using the same query",
+                        self.config.flow_id
+                    );
+                    // clean dirty time window too, this could be from create flow's check_execute
+                    let is_dirty = !self.state.read().unwrap().dirty_time_windows.is_empty();
+                    self.state.write().unwrap().dirty_time_windows.clean();

-            // TODO(discord9): not add auto column for tql query?
-            let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone());
+                    if !is_dirty {
+                        // no dirty data, hence no need to update
+                        debug!("Flow id={:?}, no new data, not update", self.config.flow_id);
+                        return Ok(None);
+                    }

-            let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false)
-                .await?;
+                    let plan = gen_plan_with_matching_schema(
+                        &self.config.query,
+                        query_ctx,
+                        engine,
+                        sink_table_schema.clone(),
+                    )
+                    .await?;

-            let plan = plan
-                .clone()
-                .rewrite(&mut add_auto_column)
-                .with_context(|_| DatafusionSnafu {
-                    context: format!("Failed to rewrite plan:\n {}\n", plan),
-                })?
-                .data;
+                    return Ok(Some(PlanInfo { plan, filter: None }));
+                }
+                _ => {
+                    // clean for tql have no use for time window
+                    self.state.write().unwrap().dirty_time_windows.clean();

-            // since no time window lower/upper bound is found, just return the original query(with auto columns)
-            return Ok(Some(PlanInfo { plan, filter: None }));
-        };
+                    let plan = gen_plan_with_matching_schema(
+                        &self.config.query,
+                        query_ctx,
+                        engine,
+                        sink_table_schema.clone(),
+                    )
+                    .await?;
+
+                    return Ok(Some(PlanInfo { plan, filter: None }));
+                }
+            };

        debug!(
            "Flow id = {:?}, found time window: precise_lower_bound={:?}, precise_upper_bound={:?} with dirty time windows: {:?}",
-            self.config.flow_id, l, u, self.state.read().unwrap().dirty_time_windows
+            self.config.flow_id, expire_lower_bound, expire_upper_bound, self.state.read().unwrap().dirty_time_windows
        );
-        let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
-            reason: format!("Can't get window size from {u:?} - {l:?}"),
-        })?;
+        let window_size = expire_upper_bound
+            .sub(&expire_lower_bound)
+            .with_context(|| UnexpectedSnafu {
+                reason: format!(
+                    "Can't get window size from {expire_upper_bound:?} - {expire_lower_bound:?}"
+                ),
+            })?;
        let col_name = self
            .config
            .time_window_expr
@@ -654,7 +694,7 @@ impl BatchingTask {
            .dirty_time_windows
            .gen_filter_exprs(
                &col_name,
-                Some(l),
+                Some(expire_lower_bound),
                window_size,
                max_window_cnt
                    .unwrap_or(self.config.batch_opts.experimental_max_filter_num_per_query),
@@ -682,7 +722,7 @@ impl BatchingTask {
        };

        let mut add_filter = AddFilterRewriter::new(expr.expr.clone());
-        let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone());
+        let mut add_auto_column = ColumnMatcherRewriter::new(sink_table_schema.clone());

        let plan =
            sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false).await?;
@@ -695,7 +735,7 @@ impl BatchingTask {
            })?
            .data;
        // only apply optimize after complex rewrite is done
-        let new_plan = apply_df_optimizer(rewrite).await?;
+        let new_plan = apply_df_optimizer(rewrite, &query_ctx).await?;

        let info = PlanInfo {
            plan: new_plan.clone(),
@@ -711,12 +751,30 @@ impl BatchingTask {
 fn create_table_with_expr(
    plan: &LogicalPlan,
    sink_table_name: &[String; 3],
+    query_type: &QueryType,
 ) -> Result<CreateTableExpr, Error> {
-    let fields = plan.schema().fields();
-    let (first_time_stamp, primary_keys) = build_primary_key_constraint(plan, fields)?;
+    let table_def = match query_type {
+        &QueryType::Sql => {
+            if let Some(def) = build_pk_from_aggr(plan)? {
+                def
+            } else {
+                build_by_sql_schema(plan)?
+            }
+        }
+        QueryType::Tql => {
+            // first try build from aggr, then from tql schema because tql query might not have aggr node
+            if let Some(table_def) = build_pk_from_aggr(plan)? {
+                table_def
+            } else {
+                build_by_tql_schema(plan)?
+            }
+        }
+    };
+    let first_time_stamp = table_def.ts_col;
+    let primary_keys = table_def.pks;

    let mut column_schemas = Vec::new();
-    for field in fields {
+    for field in plan.schema().fields() {
        let name = field.name();
        let ty = ConcreteDataType::from_arrow_type(field.data_type());
        let col_schema = if first_time_stamp == Some(name.clone()) {
@@ -724,15 +782,40 @@ fn create_table_with_expr(
        } else {
            ColumnSchema::new(name, ty, true)
        };
-        column_schemas.push(col_schema);
+
+        match query_type {
+            QueryType::Sql => {
+                column_schemas.push(col_schema);
+            }
+            QueryType::Tql => {
+                // if is val column, need to rename as val DOUBLE NULL
+                // if is tag column, need to cast type as STRING NULL
+                let is_tag_column = primary_keys.contains(name);
+                let is_val_column = !is_tag_column && first_time_stamp.as_ref() != Some(name);
+                if is_val_column {
+                    let col_schema =
+                        ColumnSchema::new(name, ConcreteDataType::float64_datatype(), true);
+                    column_schemas.push(col_schema);
+                } else if is_tag_column {
+                    let col_schema =
+                        ColumnSchema::new(name, ConcreteDataType::string_datatype(), true);
+                    column_schemas.push(col_schema);
+                } else {
+                    // time index column
+                    column_schemas.push(col_schema);
+                }
+            }
+        }
    }

-    let update_at_schema = ColumnSchema::new(
-        AUTO_CREATED_UPDATE_AT_TS_COL,
-        ConcreteDataType::timestamp_millisecond_datatype(),
-        true,
-    );
-    column_schemas.push(update_at_schema);
+    if query_type == &QueryType::Sql {
+        let update_at_schema = ColumnSchema::new(
+            AUTO_CREATED_UPDATE_AT_TS_COL,
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            true,
+        );
+        column_schemas.push(update_at_schema);
+    }

    let time_index = if let Some(time_index) = first_time_stamp {
        time_index
@@ -765,16 +848,64 @@ fn create_table_with_expr(
    })
 }

+/// simply build by schema, return first timestamp column and no primary key
+fn build_by_sql_schema(plan: &LogicalPlan) -> Result<TableDef, Error> {
+    let first_time_stamp = plan.schema().fields().iter().find_map(|f| {
+        if ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp() {
+            Some(f.name().clone())
+        } else {
+            None
+        }
+    });
+    Ok(TableDef {
+        ts_col: first_time_stamp,
+        pks: vec![],
+    })
+}
+
+/// Return first timestamp column found in output schema and all string columns
+fn build_by_tql_schema(plan: &LogicalPlan) -> Result<TableDef, Error> {
+    let first_time_stamp = plan.schema().fields().iter().find_map(|f| {
+        if ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp() {
+            Some(f.name().clone())
+        } else {
+            None
+        }
+    });
+    let string_columns = plan
+        .schema()
+        .fields()
+        .iter()
+        .filter_map(|f| {
+            if ConcreteDataType::from_arrow_type(f.data_type()).is_string() {
+                Some(f.name().clone())
+            } else {
+                None
+            }
+        })
+        .collect::<Vec<_>>();
+
+    Ok(TableDef {
+        ts_col: first_time_stamp,
+        pks: string_columns,
+    })
+}
+
+struct TableDef {
+    ts_col: Option<String>,
+    pks: Vec<String>,
+}
+
 /// Return first timestamp column which is in group by clause and other columns which are also in group by clause
 ///
 /// # Returns
 ///
 /// * `Option<String>` - first timestamp column which is in group by clause
 /// * `Vec<String>` - other columns which are also in group by clause
-fn build_primary_key_constraint(
-    plan: &LogicalPlan,
-    schema: &Fields,
-) -> Result<(Option<String>, Vec<String>), Error> {
+///
+/// if no aggregation found, return None
+fn build_pk_from_aggr(plan: &LogicalPlan) -> Result<Option<TableDef>, Error> {
+    let fields = plan.schema().fields();
    let mut pk_names = FindGroupByFinalName::default();

    plan.visit(&mut pk_names)
@@ -782,19 +913,28 @@ fn build_primary_key_constraint(
            context: format!("Can't find aggr expr in plan {plan:?}"),
        })?;

-    // if no group by clause, return empty
-    let pk_final_names = pk_names.get_group_expr_names().unwrap_or_default();
+    // if no group by clause, return empty with first timestamp column found in output schema
+    let Some(pk_final_names) = pk_names.get_group_expr_names() else {
+        return Ok(None);
+    };
    if pk_final_names.is_empty() {
-        return Ok((None, Vec::new()));
+        let first_ts_col = fields
+            .iter()
+            .find(|f| ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp())
+            .map(|f| f.name().clone());
+        return Ok(Some(TableDef {
+            ts_col: first_ts_col,
+            pks: vec![],
+        }));
    }

-    let all_pk_cols: Vec<_> = schema
+    let all_pk_cols: Vec<_> = fields
        .iter()
        .filter(|f| pk_final_names.contains(f.name()))
        .map(|f| f.name().clone())
        .collect();
    // auto create table use first timestamp column in group by clause as time index
-    let first_time_stamp = schema
+    let first_time_stamp = fields
        .iter()
        .find(|f| {
            all_pk_cols.contains(&f.name().clone())
@@ -807,7 +947,10 @@ fn build_primary_key_constraint(
        .filter(|col| first_time_stamp != Some(col.to_string()))
        .collect();

-    Ok((first_time_stamp, all_pk_cols))
+    Ok(Some(TableDef {
+        ts_col: first_time_stamp,
+        pks: all_pk_cols,
+    }))
 }

 #[cfg(test)]
@@ -853,13 +996,13 @@ mod test {
                    ColumnSchema::new(
                        "ts",
                        ConcreteDataType::timestamp_millisecond_datatype(),
-                        true,
-                    ),
+                        false,
+                    )
+                    .with_time_index(true),
                    update_at_schema.clone(),
-                    ts_placeholder_schema.clone(),
                ],
                primary_keys: vec![],
-                time_index: AUTO_CREATED_PLACEHOLDER_TS_COL.to_string(),
+                time_index: "ts".to_string(),
            },
            TestCase {
                sql: "SELECT number, max(ts) FROM numbers_with_ts GROUP BY number".to_string(),
@@ -926,6 +1069,7 @@ mod test {
                    "public".to_string(),
                    tc.sink_table_name.clone(),
                ],
+                &QueryType::Sql,
            )
            .unwrap();
            // TODO(discord9): assert expr
@@ -934,9 +1078,9 @@ mod test {
                .iter()
                .map(|c| try_as_column_schema(c).unwrap())
                .collect::<Vec<_>>();
-            assert_eq!(tc.column_schemas, column_schemas);
-            assert_eq!(tc.primary_keys, expr.primary_keys);
-            assert_eq!(tc.time_index, expr.time_index);
+            assert_eq!(tc.column_schemas, column_schemas, "{:?}", tc.sql);
+            assert_eq!(tc.primary_keys, expr.primary_keys, "{:?}", tc.sql);
+            assert_eq!(tc.time_index, expr.time_index, "{:?}", tc.sql);
        }
    }
 }
--- a/src/flow/src/batching_mode/utils.rs
+++ b/src/flow/src/batching_mode/utils.rs
@@ -24,7 +24,7 @@ use datafusion::error::Result as DfResult;
 use datafusion::logical_expr::Expr;
 use datafusion::sql::unparser::Unparser;
 use datafusion_common::tree_node::{
-    Transformed, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
+    Transformed, TreeNode as _, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
 };
 use datafusion_common::{DFSchema, DataFusionError, ScalarValue};
 use datafusion_expr::{Distinct, LogicalPlan, Projection};
@@ -122,19 +122,40 @@ pub async fn sql_to_df_plan(
    };
    let plan = engine
        .planner()
-        .plan(&query_stmt, query_ctx)
+        .plan(&query_stmt, query_ctx.clone())
        .await
        .map_err(BoxedError::new)
        .context(ExternalSnafu)?;

    let plan = if optimize {
-        apply_df_optimizer(plan).await?
+        apply_df_optimizer(plan, &query_ctx).await?
    } else {
        plan
    };
    Ok(plan)
 }

+/// Generate a plan that matches the schema of the sink table
+/// from given sql by alias and adding auto columns
+pub(crate) async fn gen_plan_with_matching_schema(
+    sql: &str,
+    query_ctx: QueryContextRef,
+    engine: QueryEngineRef,
+    sink_table_schema: SchemaRef,
+) -> Result<LogicalPlan, Error> {
+    let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), sql, false).await?;
+
+    let mut add_auto_column = ColumnMatcherRewriter::new(sink_table_schema);
+    let plan = plan
+        .clone()
+        .rewrite(&mut add_auto_column)
+        .with_context(|_| DatafusionSnafu {
+            context: format!("Failed to rewrite plan:\n {}\n", plan),
+        })?
+        .data;
+    Ok(plan)
+}
+
 pub fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
    /// A dialect that forces identifiers to be quoted when have uppercase
    struct ForceQuoteIdentifiers;
@@ -239,75 +260,28 @@ impl TreeNodeVisitor<'_> for FindGroupByFinalName {
    }
 }

-/// Add to the final select columns like `update_at`
+/// Optionally add to the final select columns like `update_at` if the sink table has such column
 /// (which doesn't necessary need to have exact name just need to be a extra timestamp column)
 /// and `__ts_placeholder`(this column need to have exact this name and be a timestamp)
 /// with values like `now()` and `0`
 ///
 /// it also give existing columns alias to column in sink table if needed
 #[derive(Debug)]
-pub struct AddAutoColumnRewriter {
+pub struct ColumnMatcherRewriter {
    pub schema: SchemaRef,
    pub is_rewritten: bool,
 }

-impl AddAutoColumnRewriter {
+impl ColumnMatcherRewriter {
    pub fn new(schema: SchemaRef) -> Self {
        Self {
            schema,
            is_rewritten: false,
        }
    }
-}
-
-impl TreeNodeRewriter for AddAutoColumnRewriter {
-    type Node = LogicalPlan;
-    fn f_down(&mut self, mut node: Self::Node) -> DfResult<Transformed<Self::Node>> {
-        if self.is_rewritten {
-            return Ok(Transformed::no(node));
-        }
-
-        // if is distinct all, wrap it in a projection
-        if let LogicalPlan::Distinct(Distinct::All(_)) = &node {
-            let mut exprs = vec![];
-
-            for field in node.schema().fields().iter() {
-                exprs.push(Expr::Column(datafusion::common::Column::new_unqualified(
-                    field.name(),
-                )));
-            }
-
-            let projection =
-                LogicalPlan::Projection(Projection::try_new(exprs, Arc::new(node.clone()))?);
-
-            node = projection;
-        }
-        // handle table_scan by wrap it in a projection
-        else if let LogicalPlan::TableScan(table_scan) = node {
-            let mut exprs = vec![];
-
-            for field in table_scan.projected_schema.fields().iter() {
-                exprs.push(Expr::Column(datafusion::common::Column::new(
-                    Some(table_scan.table_name.clone()),
-                    field.name(),
-                )));
-            }
-
-            let projection = LogicalPlan::Projection(Projection::try_new(
-                exprs,
-                Arc::new(LogicalPlan::TableScan(table_scan)),
-            )?);
-
-            node = projection;
-        }
-
-        // only do rewrite if found the outermost projection
-        let mut exprs = if let LogicalPlan::Projection(project) = &node {
-            project.expr.clone()
-        } else {
-            return Ok(Transformed::no(node));
-        };

+    /// modify the exprs in place so that it matches the schema and some auto columns are added
+    fn modify_project_exprs(&mut self, mut exprs: Vec<Expr>) -> DfResult<Vec<Expr>> {
        let all_names = self
            .schema
            .column_schemas()
@@ -391,10 +365,76 @@ impl TreeNodeRewriter for AddAutoColumnRewriter {
                    query_col_cnt, exprs, table_col_cnt, self.schema.column_schemas()
                )));
        }
+        Ok(exprs)
+    }
+}

-        self.is_rewritten = true;
-        let new_plan = node.with_new_exprs(exprs, node.inputs().into_iter().cloned().collect())?;
-        Ok(Transformed::yes(new_plan))
+impl TreeNodeRewriter for ColumnMatcherRewriter {
+    type Node = LogicalPlan;
+    fn f_down(&mut self, mut node: Self::Node) -> DfResult<Transformed<Self::Node>> {
+        if self.is_rewritten {
+            return Ok(Transformed::no(node));
+        }
+
+        // if is distinct all, wrap it in a projection
+        if let LogicalPlan::Distinct(Distinct::All(_)) = &node {
+            let mut exprs = vec![];
+
+            for field in node.schema().fields().iter() {
+                exprs.push(Expr::Column(datafusion::common::Column::new_unqualified(
+                    field.name(),
+                )));
+            }
+
+            let projection =
+                LogicalPlan::Projection(Projection::try_new(exprs, Arc::new(node.clone()))?);
+
+            node = projection;
+        }
+        // handle table_scan by wrap it in a projection
+        else if let LogicalPlan::TableScan(table_scan) = node {
+            let mut exprs = vec![];
+
+            for field in table_scan.projected_schema.fields().iter() {
+                exprs.push(Expr::Column(datafusion::common::Column::new(
+                    Some(table_scan.table_name.clone()),
+                    field.name(),
+                )));
+            }
+
+            let projection = LogicalPlan::Projection(Projection::try_new(
+                exprs,
+                Arc::new(LogicalPlan::TableScan(table_scan)),
+            )?);
+
+            node = projection;
+        }
+
+        // only do rewrite if found the outermost projection
+        // if the outermost node is projection, can rewrite the exprs
+        // if not, wrap it in a projection
+        if let LogicalPlan::Projection(project) = &node {
+            let exprs = project.expr.clone();
+            let exprs = self.modify_project_exprs(exprs)?;
+
+            self.is_rewritten = true;
+            let new_plan =
+                node.with_new_exprs(exprs, node.inputs().into_iter().cloned().collect())?;
+            Ok(Transformed::yes(new_plan))
+        } else {
+            // wrap the logical plan in a projection
+            let mut exprs = vec![];
+            for field in node.schema().fields().iter() {
+                exprs.push(Expr::Column(datafusion::common::Column::new_unqualified(
+                    field.name(),
+                )));
+            }
+            let exprs = self.modify_project_exprs(exprs)?;
+            self.is_rewritten = true;
+            let new_plan =
+                LogicalPlan::Projection(Projection::try_new(exprs, Arc::new(node.clone()))?);
+            Ok(Transformed::yes(new_plan))
+        }
    }

    /// We might add new columns, so we need to recompute the schema
@@ -677,7 +717,7 @@ mod test {
        let ctx = QueryContext::arc();
        for (before, after, column_schemas) in testcases {
            let schema = Arc::new(Schema::new(column_schemas));
-            let mut add_auto_column_rewriter = AddAutoColumnRewriter::new(schema);
+            let mut add_auto_column_rewriter = ColumnMatcherRewriter::new(schema);

            let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), before, false)
                .await
--- a/src/flow/src/df_optimizer.rs
+++ b/src/flow/src/df_optimizer.rs
@@ -44,6 +44,7 @@ use query::optimizer::count_wildcard::CountWildcardToTimeIndexRule;
 use query::parser::QueryLanguageParser;
 use query::query_engine::DefaultSerializer;
 use query::QueryEngine;
+use session::context::QueryContextRef;
 use snafu::ResultExt;
 /// note here we are using the `substrait_proto_df` crate from the `substrait` module and
 /// rename it to `substrait_proto`
@@ -57,8 +58,9 @@ use crate::plan::TypedPlan;
 // TODO(discord9): use `Analyzer` to manage rules if more `AnalyzerRule` is needed
 pub async fn apply_df_optimizer(
    plan: datafusion_expr::LogicalPlan,
+    query_ctx: &QueryContextRef,
 ) -> Result<datafusion_expr::LogicalPlan, Error> {
-    let cfg = ConfigOptions::new();
+    let cfg = query_ctx.create_config_options();
    let analyzer = Analyzer::with_rules(vec![
        Arc::new(CountWildcardToTimeIndexRule),
        Arc::new(AvgExpandRule),
@@ -107,12 +109,12 @@ pub async fn sql_to_flow_plan(
        .context(ExternalSnafu)?;
    let plan = engine
        .planner()
-        .plan(&stmt, query_ctx)
+        .plan(&stmt, query_ctx.clone())
        .await
        .map_err(BoxedError::new)
        .context(ExternalSnafu)?;

-    let opted_plan = apply_df_optimizer(plan).await?;
+    let opted_plan = apply_df_optimizer(plan, &query_ctx).await?;

    // TODO(discord9): add df optimization
    let sub_plan = DFLogicalSubstraitConvertor {}
--- a/src/flow/src/engine.rs
+++ b/src/flow/src/engine.rs
@@ -70,6 +70,7 @@ pub struct CreateFlowArgs {
    pub create_if_not_exists: bool,
    pub or_replace: bool,
    pub expire_after: Option<i64>,
+    pub eval_interval: Option<i64>,
    pub comment: Option<String>,
    pub sql: String,
    pub flow_options: HashMap<String, String>,
--- a/src/flow/src/error.rs
+++ b/src/flow/src/error.rs
@@ -16,6 +16,7 @@

 use std::any::Any;

+use api::v1::CreateTableExpr;
 use arrow_schema::ArrowError;
 use common_error::ext::BoxedError;
 use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
@@ -60,6 +61,14 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Error encountered while creating sink table for flow: {create:?}"))]
+    CreateSinkTable {
+        create: CreateTableExpr,
+        source: BoxedError,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Time error"))]
    Time {
        source: common_time::error::Error,
@@ -331,9 +340,10 @@ impl ErrorExt for Error {
            | Self::ListFlows { .. } => StatusCode::TableNotFound,
            Self::FlowNotFound { .. } => StatusCode::FlowNotFound,
            Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
-            Self::CreateFlow { .. } | Self::Arrow { .. } | Self::Time { .. } => {
-                StatusCode::EngineExecuteQuery
-            }
+            Self::CreateFlow { .. }
+            | Self::CreateSinkTable { .. }
+            | Self::Arrow { .. }
+            | Self::Time { .. } => StatusCode::EngineExecuteQuery,
            Self::Unexpected { .. }
            | Self::SyncCheckTask { .. }
            | Self::IllegalCheckTaskState { .. } => StatusCode::Unexpected,
--- a/src/flow/src/heartbeat.rs
+++ b/src/flow/src/heartbeat.rs
@@ -218,6 +218,7 @@ impl HeartbeatTask {
                        if let Some(message) = message {
                            Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report)
                        } else {
+                            warn!("Sender has been dropped, exiting the heartbeat loop");
                            // Receives None that means Sender was dropped, we need to break the current loop
                            break
                        }
@@ -259,7 +260,11 @@ impl HeartbeatTask {
                            error!(e; "Error while handling heartbeat response");
                        }
                    }
-                    Ok(None) => break,
+                    Ok(None) => {
+                        warn!("Heartbeat response stream closed");
+                        capture_self.start_with_retry(retry_interval).await;
+                        break;
+                    }
                    Err(e) => {
                        error!(e; "Occur error while reading heartbeat response");
                        capture_self.start_with_retry(retry_interval).await;
--- a/src/flow/src/test_utils.rs
+++ b/src/flow/src/test_utils.rs
@@ -172,7 +172,9 @@ pub async fn sql_to_substrait(engine: Arc<dyn QueryEngine>, sql: &str) -> proto:
        .plan(&stmt, QueryContext::arc())
        .await
        .unwrap();
-    let plan = apply_df_optimizer(plan).await.unwrap();
+    let plan = apply_df_optimizer(plan, &QueryContext::arc())
+        .await
+        .unwrap();

    // encode then decode so to rely on the impl of conversion from logical plan to substrait plan
    let bytes = DFLogicalSubstraitConvertor {}
--- a/src/flow/src/transform.rs
+++ b/src/flow/src/transform.rs
@@ -293,7 +293,9 @@ mod test {
            .plan(&stmt, QueryContext::arc())
            .await
            .unwrap();
-        let plan = apply_df_optimizer(plan).await.unwrap();
+        let plan = apply_df_optimizer(plan, &QueryContext::arc())
+            .await
+            .unwrap();

        // encode then decode so to rely on the impl of conversion from logical plan to substrait plan
        let bytes = DFLogicalSubstraitConvertor {}
@@ -315,7 +317,7 @@ mod test {
            .plan(&stmt, QueryContext::arc())
            .await
            .unwrap();
-        let plan = apply_df_optimizer(plan).await;
+        let plan = apply_df_optimizer(plan, &QueryContext::arc()).await;

        assert!(plan.is_err());
    }
--- a/src/frontend/src/error.rs
+++ b/src/frontend/src/error.rs
@@ -337,12 +337,6 @@ pub enum Error {
        source: BoxedError,
    },

-    #[snafu(display("In-flight write bytes exceeded the maximum limit"))]
-    InFlightWriteBytesExceeded {
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Failed to decode logical plan from substrait"))]
    SubstraitDecodeLogicalPlan {
        #[snafu(implicit)]
@@ -369,6 +363,14 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Failed to acquire more permits from limiter"))]
+    AcquireLimiter {
+        #[snafu(source)]
+        error: tokio::sync::AcquireError,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -444,13 +446,13 @@ impl ErrorExt for Error {

            Error::TableOperation { source, .. } => source.status_code(),

-            Error::InFlightWriteBytesExceeded { .. } => StatusCode::RateLimited,
-
            Error::DataFusion { error, .. } => datafusion_status_code::<Self>(error, None),

            Error::Cancelled { .. } => StatusCode::Cancelled,

            Error::StatementTimeout { .. } => StatusCode::Cancelled,
+
+            Error::AcquireLimiter { .. } => StatusCode::Internal,
        }
    }

--- a/src/frontend/src/events.rs
+++ b/src/frontend/src/events.rs
@@ -19,7 +19,10 @@ use client::inserter::{Context, InsertOptions, Inserter};
 use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME};
 use common_error::ext::BoxedError;
 use common_event_recorder::error::{InsertEventsSnafu, Result};
-use common_event_recorder::{build_row_inserts_request, group_events_by_type, Event, EventHandler};
+use common_event_recorder::{
+    build_row_inserts_request, group_events_by_type, Event, EventHandler,
+    DEFAULT_COMPACTION_TIME_WINDOW,
+};
 use common_frontend::slow_query_event::SLOW_QUERY_EVENT_TYPE;
 use datafusion::common::HashMap;
 use operator::statement::{InserterImpl, StatementExecutorRef};
@@ -47,6 +50,7 @@ impl EventHandlerImpl {
                    Some(InsertOptions {
                        ttl: slow_query_ttl,
                        append_mode: true,
+                        twcs_compaction_time_window: Some(DEFAULT_COMPACTION_TIME_WINDOW),
                    }),
                )) as _,
            )]),
@@ -55,6 +59,7 @@ impl EventHandlerImpl {
                Some(InsertOptions {
                    ttl: global_ttl,
                    append_mode: true,
+                    twcs_compaction_time_window: Some(DEFAULT_COMPACTION_TIME_WINDOW),
                }),
            )),
        }
--- a/Show More
+++ b/Show More